def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with uniittest_with_sqlite self.__config = testing.setUp(request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.PII, ('view', 'logout'))] edauth.set_roles(defined_roles) set_tenant_map({get_unittest_tenant_name(): 'NC'}) # Set up context security dummy_session = create_test_session([RolesConstants.PII]) self.__config.testing_securitypolicy(dummy_session.get_user())
def setUp(self): self.reg = Registry() self.reg.settings = {EDWARE_PUBLIC_SECRET: SECRET} cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived,public.very_shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with unittest_with_sqlite self.__config = testing.setUp(registry=self.reg, request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.PII, ('view', 'logout'))] edauth.set_roles(defined_roles) set_tenant_map_public_reports({get_unittest_tenant_name(): 'NC'}) set_default_min_cell_size(0) # so that it works with unittest edcore module PublicDBConnection.CONFIG_NAMESPACE = 'edware.db'
def setUp(self): CacheManager( **parse_cache_config_options({ 'cache.regions': 'public.shortlived', 'cache.type': 'memory', 'cache.public.shortlived.expire': 7200 })) path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../resources/meta/performance') static_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../resources/meta/static') component.provideUtility( PerfMetadataTemplateManager(asmt_meta_dir=path, static_asmt_meta_dir=static_path), IMetadataTemplateManager) static_json = os.path.join(static_path, 'ELA.static_asmt_metadata.json') with open(static_json) as f: self.__metadata = json.loads(f.read())
def test_flush_data(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data' } cache_manager = CacheManager(**parse_cache_config_options(cache_opts)) params = {} params['stateCode'] = 'NC' mycache = cache_manager.get_cache('my_namespace', **params) f = FakeFunc('hello') cache = mycache.get("my_namespace {'stateCode': 'NC'}", createfunc=f.fake) self.assertEqual(cache, f.msg) f.msg = 'bye' self.assertNotEqual(cache, f.msg) cache_flush_data() f.msg = 'bye' mycache = cache_manager.get_cache('my_namespace', **params) cache = mycache.get("my_namespace {'stateCode': 'NC'}", createfunc=f.fake) self.assertEqual(cache, f.msg)
def setUp(self): super(MinificationTestCase, self).setUp() self.fixture_path = mkdtemp() self.touch_file('b.js') self.touch_file('b.css') self.touch_file('c.css') self.touch_file('c.js') os.mkdir(os.path.join(self.fixture_path, 'deep')) self.touch_file('deep/a.css') self.touch_file('deep/a.js') self.touch_file('deep/d.css') self.touch_file('deep/d.js') # set up a fake request with a beaker session pecan.core.state.request = Request.blank('/') self.cache = CacheManager(key=pecan.conf.cache['key']) pecan.core.state.request.environ['beaker.cache'] = self.cache pecan.conf.app.static_root = self.fixture_path
def setUp(self): reg = Registry() reg.settings = {EDWARE_PUBLIC_SECRET: SECRET} cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived,public.very_shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() self.__config = testing.setUp(registry=reg, request=self.__request, hook_zca=False) dummy_session = create_test_session(['SUPER_USER'], uid='272', tenant='cat') self.__config.testing_securitypolicy(dummy_session)
def __init__(self): cache_opts = { 'cache.type': 'memory', 'cache.data_dir': './applications/chimitheque/cache/', 'cache.lock_dir': './applications/chimitheque/cache/' } #cache_opts = { # 'cache.type': 'memory' # 'cache.type': 'file' #} cache = CacheManager(**parse_cache_config_options(cache_opts)) # user id has a unique key to store the MESSAGE in cache try: uid = 'cache_console_%s' %str(current.auth.user.id) except AttributeError: uid = '' # getting the cache self.tmpl_cache = cache.get_cache(uid, expire=None) self.logger = logging.getLogger('web2py.app.chimitheque')
def init_cache(config): """ Called by enviroment.py after most of the Pylons setup is done. """ if config['beaker.cache.enabled']: from beaker.cache import CacheManager from beaker.util import parse_cache_config_options cache_manager = CacheManager(**parse_cache_config_options(config)) # AllanC - no point in having buckets, these can be represented by a single cache and managed by redis # but without putting them in a dict they cant be imported .. the dict serves as a reference to the conructed objects for bucket in [ 'members', 'contents', 'contents_index', 'members_index', 'messages_index', 'content_show', 'members_show' ]: _cache[bucket] = cache_manager.get_cache(bucket) if config[ 'development_mode']: # We don't want to clear the cache on every server update. This could lead to the server undergoing heavy load as ALL the cache is rebuilt. This could be removed if it causes a problem _cache[bucket].clear()
class HttpClient(object): cache_opts = { 'cache.type': 'memory', } cache = CacheManager(**parse_cache_config_options(cache_opts)) @cache.cache('get', expire=60) def get(self, url): try: logger.info('Fetching data from \'%s\'.', url) response = urllib.request.urlopen(url) content = response.read() encoding = response.headers['content-type'].split('charset')[-1] return content.decode(encoding) except Exception as e: logger.error('Error fetching data from \'%s\': %s', url, e) def refresh(self): self.cache.invalidate(self.get, 'get')
def __init__(self): """initialize the class CacheManager More configuration refer to http://beaker.readthedocs.org/en/latest/caching.html#about """ # store the basic configuration self.cache_opts = { 'cache.type': 'memory', # can be "memory" or "file" 'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock' } # create CacheManager instance with cache_opts self.cache = CacheManager( **parse_cache_config_options(self.cache_opts)) # In addition to the defaults supplied to the CacheManager instance, # any of the Cache options can be changed on a per-namespace basis, # by setting a type, and expire option. self.tmpl_cache = self.cache.get_cache('mytemplate', type='file', expire=3600)
def init_cache_manager(app): cache_opts = {'cache.expire': app.config.get('CACHE_EXPIRE', 3600)} if 'CACHE_TYPE' not in app.config or not app.config['CACHE_TYPE']: app.config['CACHE_TYPE'] = 'file' if app.config['CACHE_TYPE'] is 'file': if 'CACHE_ROOT' not in app.config or not app.config['CACHE_ROOT']: app.config['CACHE_ROOT'] = '/tmp/%s' % __name__ cache_opts['cache.type'] = app.config['CACHE_TYPE'] if 'CACHE_ROOT' in app.config and app.config['CACHE_ROOT']: cache_opts['cache.data_dir'] = app.config['CACHE_ROOT'] + '/data' cache_opts['cache.lock_dir'] = app.config['CACHE_ROOT'] + '/lock' if 'CACHE_URL' in app.config and app.config['CACHE_URL']: cache_opts['cache.url'] = app.config['CACHE_URL'] cache = CacheManager(**parse_cache_config_options(cache_opts))
def setUp(self): self.reg = Registry() self.__temp_dir = tempfile.TemporaryDirectory() self.__work_zone_dir = os.path.join(self.__temp_dir.name, 'work_zone') self.__raw_data_base_dir = os.path.join(self.__temp_dir.name, 'raw_data') self.__item_level_base_dir = os.path.join(self.__temp_dir.name, 'item_level') self.reg.settings = { 'extract.work_zone_base_dir': self.__work_zone_dir, 'hpz.file_upload_base_url': 'http://somehost:82/files', 'extract.available_grades': '3,4,5,6,7,8,11', 'extract.raw_data_base_dir': self.__raw_data_base_dir, 'extract.item_level_base_dir': self.__item_level_base_dir } settings = {'extract.celery.CELERY_ALWAYS_EAGER': True} setup_celery(settings) cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) # Set up user context self.__request = DummyRequest() # Must set hook_zca to false to work with unittest_with_sqlite self.__config = testing.setUp(registry=self.reg, request=self.__request, hook_zca=False) defined_roles = [ (Allow, RolesConstants.SAR_EXTRACTS, ('view', 'logout')), (Allow, RolesConstants.AUDIT_XML_EXTRACTS, ('view', 'logout')), (Allow, RolesConstants.ITEM_LEVEL_EXTRACTS, ('view', 'logout')) ] edauth.set_roles(defined_roles) dummy_session = create_test_session([ RolesConstants.SAR_EXTRACTS, RolesConstants.AUDIT_XML_EXTRACTS, RolesConstants.ITEM_LEVEL_EXTRACTS ]) self.__config.testing_securitypolicy(dummy_session.get_user()) set_tenant_map({get_unittest_tenant_name(): 'NC'})
def prepareData(self, controller, container): container.totalPostsCount = 0 mstat = False vts = False userStats = (0, 0) chTime = g.OPT.statsCacheTime if chTime > 0: cm = CacheManager(type = 'memory') cch = cm.get_cache('home_stats') container.totalPostsCount = cch.get_value(key = "totalPosts", createfunc = Post.getPostsCount, expiretime = chTime) mstat = cch.get_value(key = "mainStats", createfunc = Tag.getStats, expiretime = chTime) userStats = cch.get_value(key = "userStats", createfunc = User.getStats, expiretime = chTime) vts = cch.get_value(key = "vitalSigns", createfunc = Post.vitalSigns, expiretime = chTime) else: container.totalPostsCount = Post.getPostsCount() userStats = User.getStats() mstat = Tag.getStats() vts = Post.vitalSigns() def taglistcmp(a, b): return cmp(b.count, a.count) or cmp(a.board.tag, b.board.tag) container.totalUsersCount = userStats[0] container.bannedUsersCount = userStats[1] container.boards = sorted(mstat.boards, taglistcmp) container.tags = sorted(mstat.tags, taglistcmp) container.stags = sorted(mstat.stags, taglistcmp) container.totalBoardsThreads = mstat.totalBoardsThreads container.totalBoardsPosts = mstat.totalBoardsPosts container.totalTagsThreads = mstat.totalTagsThreads container.totalTagsPosts = mstat.totalTagsPosts container.totalSTagsThreads = mstat.totalSTagsThreads container.totalSTagsPosts = mstat.totalSTagsPosts container.last1KUsersCount = vts.last1KUsersCount container.prev1KUsersCount = vts.prev1KUsersCount container.lastWeekMessages = vts.lastWeekMessages container.prevWeekMessages = vts.prevWeekMessages
def checkCond(self, userInst): """ print self.__getUserAge(userInst) print self.__getAllPostsCount(userInst) print self.__getRecentPostsCount(userInst) print self.__checkLastGivenInvite(userInst) """ obligatoryCondition = not(userInst.Anonymous) and \ (self.__getUserAge(userInst) >= g.OPT.minimalAge) and \ (self.__getAllPostsCount(userInst) >= g.OPT.minimalPostsCount) and \ (self.__checkLastGivenInvite(userInst)) if obligatoryCondition: recentPostsCount = self.__getRecentPostsCount(userInst) cm = CacheManager(type = 'memory') cch = cm.get_cache('home_stats') cacheTime = getattr(g.OPT, 'statsCacheTime', 30) vts = cch.get_value(key = "vitalSigns", createfunc = Post.vitalSigns, expiretime = cacheTime) return (recentPostsCount >= g.OPT.minimalRecentPostsCount) and \ (100.0 * recentPostsCount) / vts.lastWeekMessages >= g.OPT.recentPostsPercentage return None
def __init__(self, app, cookie_cache=True, cache_opts=None, debug=False, cookie_max_age=0, classifiers=[]): self.debug = debug self.cookie_cache = cookie_cache cache_manager = CacheManager(**parse_cache_config_options( cache_opts or self.DEFAULT_CACHE_OPTIONS)) self.cache = cache_manager.get_cache('mobi.devices') if self.debug: logger.info(u'MobiDeviceMiddleware start in debug mode.') self.app = app self.set_cookie_max_age(int(cookie_max_age)) if not isinstance(classifiers, list): classifiers = [classifiers] self.classifiers = classifiers
def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with unittest_with_sqlite self.__config = testing.setUp(request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.PII, ('view', 'logout'))] edauth.set_roles(defined_roles) set_tenant_map({get_unittest_tenant_name(): 'NC'}) # Set up context security dummy_session = Session() dummy_session.set_user_context([ RoleRelation(RolesConstants.PII, get_unittest_tenant_name(), "NC", "228", "242") ]) dummy_session.set_uid('a5ddfe12-740d-4487-9179-de70f6ac33be') self.__config.testing_securitypolicy(dummy_session.get_user())
def setup_caching(config): cache_type = config.get('caching', 'type') data_dir = config.get('caching', 'data_dir') short_expire = config.get('caching', 'short_expire') long_expire = config.get('caching', 'long_expire') cache_opts = { 'cache.type': cache_type, 'cache.data_dir': data_dir, 'cache.lock_dir': data_dir, 'cache.regions': 'short_term, long_term', 'cache.short_term.type': cache_type, 'cache.short_term.expire': short_expire, 'cache.long_term.type': cache_type, 'cache.long_term.expire': long_expire, } cache_manager = CacheManager(**parse_cache_config_options(cache_opts)) short_term_cache = cache_manager.get_cache('short_term', expire=short_expire) long_term_cache = cache_manager.get_cache('long_term', expire=long_expire) return short_term_cache, long_term_cache
def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived,public.very_shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() self.__tenant_name = get_unittest_tenant_name() set_tenant_map({self.__tenant_name: "NC"}) # Must set hook_zca to false to work with unittest_with_sqlite. self.__config = testing.setUp(request=self.__request, hook_zca=False) defined_roles = [(Allow, 'PII', ('view', 'logout'))] edauth.set_roles(defined_roles) # Set up context security dummy_session = create_test_session(['PII']) dummy_session.set_user_context([ RoleRelation("PII", get_unittest_tenant_name(), "NC", "228", None) ]) self.__config.testing_securitypolicy(dummy_session.get_user()) set_default_min_cell_size(0)
def __init__(self,**kwargs): _handlers = [ (r"/cmcc", cmcc_portal.HomeHandler), (r"/cmcc/login", cmcc_portal.LoginHandler), (r"/cmcc/logout", cmcc_portal.LogoutHandler), (r"/huawei", cmcc_portal.HomeHandler), (r"/huawei/login", cmcc_portal.LoginHandler), (r"/huawei/logout", cmcc_portal.LogoutHandler), ] server = kwargs.pop("server") settings = dict( cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", login_url="/login", template_path=os.path.join(os.path.dirname(__file__), "portal/views"), static_path=os.path.join(os.path.dirname(__file__), "static"), xsrf_cookies=True, debug=kwargs.get("debug",False), share_secret=server.share_secret, ac_addr=(server.ac[0],int(server.ac[1])) ) self.cache = CacheManager(**parse_cache_config_options({ 'cache.type': 'file', 'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock' })) self.tp_lookup = TemplateLookup(directories=[settings['template_path']], default_filters=['decode.utf8'], input_encoding='utf-8', output_encoding='utf-8', encoding_errors='replace', module_directory="/tmp/portal") cyclone.web.Application.__init__(self, _handlers, **settings)
def load_config(): """ Carrega configuração :return: """ #print(environment) config = ConfigParser.ConfigParser() here = os.path.abspath(os.path.dirname(__file__)) config_file = os.path.join(here, '../' + environment + '.ini') config.read(config_file) # Parâmetros globais de configuração nltk.data.path.append(config.get('nltk', 'data_dir')) nlpnet.set_data_dir(config.get('nlpnet', 'data_dir')) # Logging logging.config.fileConfig(config_file) # Cache configurations cache_opts = { 'cache.regions': config.get('lbsociam', 'cache.regions'), 'cache.type': config.get('lbsociam', 'cache.type'), 'cache.short_term.expire': config.get('lbsociam', 'cache.short_term.expire'), 'cache.default_term.expire': config.get('lbsociam', 'cache.default_term.expire'), 'cache.long_term.expire': config.get('lbsociam', 'cache.long_term.expire') } cache = CacheManager(**parse_cache_config_options(cache_opts)) return config
def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with uniittest_with_sqlite reg = Registry() # Set up defined roles self.__tenant_name = get_unittest_tenant_name() set_tenant_map({self.__tenant_name: "NC"}) self.__temp_dir = tempfile.mkdtemp() reg.settings = {} reg.settings['pdf.report_base_dir'] = self.__temp_dir self.__config = testing.setUp(registry=reg, request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.PII, ('view', 'logout'))] edauth.set_roles(defined_roles) # Set up context security dummy_session = create_test_session([RolesConstants.PII]) dummy_session.set_user_context([ RoleRelation(RolesConstants.PII, self.__tenant_name, 'NC', '228', '242'), RoleRelation(RolesConstants.PII, self.__tenant_name, 'NC', '229', '939') ]) self.__config.testing_securitypolicy(dummy_session.get_user()) # celery settings for UT settings = {'services.celery.CELERY_ALWAYS_EAGER': True} self.__request.matchdict[Constants.REPORT] = 'indivStudentReport.html' self.__request.cookies = {'edware': '123'} setup_celery(settings)
'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock' } # Create cache regions cache_regions.update({ 'day_term': { 'expire': 86400 }, 'hour_term': { 'expire': 3600 } }) # Create cache for this instance cache = CacheManager(**parse_cache_config_options(cache_opts)) class APIManager: def __init__(self): atexit.register(self.__on_exit) # Load the bill texts from the persistent cache if it's less than a week old self.bill_texts = self.__load_pickle("pickles/billtext_cache.p") self.bill_subjects = self.__load_pickle("pickles/billsubject_cache.p") # Since I have a limited number of API calls to LegiScan, keep track of the number of calls we're making self.legiscan_call_count = 0 def get_cached_bill_texts(self): return self.bill_texts
def __init__(self, config): self.cache = CacheManager(**parse_cache_config_options(config))
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = 'Table' DB_ATTRIBUTE = 'db' STATISTICS_FORMAT_SPEC = app.config['STATISTICS_FORMAT_SPEC'] TABLE_TYPE = 'Table' BOOKMARK_TYPE = 'Bookmark' USER_TYPE = 'User' READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BOOKMARK_ACTIVE_KEY = 'active' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' # Qualified Name of the Glossary, that holds the user defined terms. # For Amundsen, we are using Glossary Terms as the Tags. AMUNDSEN_USER_TAGS = 'amundsen_user_tags' _CACHE = CacheManager(**parse_cache_config_options({'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC})) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '', encrypted: bool = False, validate_ssl: bool = False, client_kwargs: dict = dict()) -> None: """ Initiate the Apache Atlas client with the provided credentials """ protocol = 'https' if encrypted else 'http' self.client = AtlasClient(f'{protocol}://{host}:{port}', (user, password)) self.client.session.verify = validate_ssl def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile(r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_bookmark_qn(self, bookmark_qn: str) -> Dict: """ Parse bookmark qualifiedName and extract the info :param bookmark_qn: Qualified Name of Bookmark entity :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile(r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<entity_type>[^.]*) \. (?P<user_id>[^.]*)\.bookmark \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(bookmark_qn) return result.groupdict() if result else dict() def _get_user_details(self, user_id: str) -> Dict: """ Helper function to help get the user details if the `USER_DETAIL_METHOD` is configured, else uses the user_id for both email and user_id properties. :param user_id: The Unique user id of a user entity :return: a dictionary of user details """ if app.config.get('USER_DETAIL_METHOD'): user_details = app.config.get('USER_DETAIL_METHOD')(user_id) # type: ignore else: user_details = {'email': user_id, 'user_id': user_id} return user_details def _get_table_entity(self, *, table_uri: str) -> AtlasEntityWithExtInfo: """ Fetch information from table_uri and then find the appropriate entity :param table_uri: The table URI coming from Amundsen Frontend :return: A table entity matching the Qualified Name derived from table_uri """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name', ''), table_info.get('cluster'), table_info.get('db') ) try: return self.client.entity.get_entity_by_attribute(type_name=table_info['entity'], uniq_attributes=[(self.QN_KEY, table_qn)]) except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException('Table URI( {table_uri} ) does not exist' .format(table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> AtlasEntityWithExtInfo: """ Fetches an user entity from an id :param user_id: User ID :return: A User entity matching the user_id """ try: return self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_id)]) except Exception as ex: raise NotFoundException('(User {user_id}) does not exist' .format(user_id=user_id)) def _create_bookmark(self, entity: AtlasEntityWithExtInfo, user_guid: str, bookmark_qn: str, table_uri: str) -> None: """ Creates a bookmark entity for a specific user and table uri. :param user_guid: User's guid :param bookmark_qn: Bookmark qualifiedName :return: """ bookmark_entity = { 'entity': { 'typeName': self.BOOKMARK_TYPE, 'attributes': {'qualifiedName': bookmark_qn, self.BOOKMARK_ACTIVE_KEY: True, 'entityUri': table_uri, 'user': {'guid': user_guid}, 'entity': {'guid': entity.entity[self.GUID_KEY]}} } } bookmark_entity = type_coerce(bookmark_entity, AtlasEntityWithExtInfo) self.client.entity.create_entity(bookmark_entity) def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> AtlasEntityWithExtInfo: """ Fetch a Bookmark entity from parsing table uri and user id. If Bookmark is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=entity_uri) bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format(table_info.get('db'), table_info.get('name'), table_info.get('entity'), user_id, table_info.get('cluster')) try: bookmark_entity = self.client.entity.get_entity_by_attribute(type_name=self.BOOKMARK_TYPE, uniq_attributes=[(self.QN_KEY, bookmark_qn)]) except Exception as ex: LOGGER.exception(f'Bookmark not found. {str(ex)}') table_entity = self._get_table_entity(table_uri=entity_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create bookmark entity with the user relation. self._create_bookmark(table_entity, user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri) # Fetch bookmark entity after creating it. bookmark_entity = self.client.entity.get_entity_by_attribute(type_name=self.BOOKMARK_TYPE, uniq_attributes=[(self.QN_KEY, bookmark_qn)]) return bookmark_entity def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column[self.GUID_KEY]] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: AtlasEntityWithExtInfo) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Stat model. :param entity: AtlasEntityWithExtInfo object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list(): column_status = column.get('entityStatus', 'inactive').lower() if column_status != 'active': continue col_entity = entity.referredEntities[column[self.GUID_KEY]] col_attrs = col_entity[self.ATTRS_KEY] statistics = list() badges = list() for column_classification in col_entity.get('classifications') or list(): if column_classification.get('entityStatus') == Status.ACTIVE: name = column_classification.get('typeName') badges.append(Badge(badge_name=name, category='default')) for stats in col_attrs.get('statistics') or list(): stats_attrs = stats['attributes'] stat_type = stats_attrs.get('stat_name') stat_format = self.STATISTICS_FORMAT_SPEC.get(stat_type, dict()) if not stat_format.get('drop', False): stat_type = stat_format.get('new_name', stat_type) stat_val = stats_attrs.get('stat_val') format_val = stat_format.get('format') if format_val: stat_val = format_val.format(stat_val) else: stat_val = str(stat_val) start_epoch = stats_attrs.get('start_epoch') end_epoch = stats_attrs.get('end_epoch') statistics.append( Stat( stat_type=stat_type, stat_val=stat_val, start_epoch=start_epoch, end_epoch=end_epoch, ) ) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType') or col_attrs.get('data_type'), sort_order=col_attrs.get('position') or 9999, stats=statistics, badges=badges ) ) return sorted(columns, key=lambda item: item.sort_order) def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities = self.client.entity.get_entities_by_guids(guids=guids) for report_entity in report_entities.entities: try: if report_entity.status == Status.ACTIVE: report_attrs = report_entity.attributes reports.append( ResourceReport( name=report_attrs['name'], url=report_attrs['url'] ) ) except (KeyError, AttributeError) as ex: LOGGER.exception('Error while accessing table report: {}. {}' .format(str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports def _get_owners(self, data_owners: list, fallback_owner: str = None) -> List[User]: owners_detail = list() active_owners_list = list() active_owners = filter(lambda item: item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE, data_owners) for owner in active_owners: owner_qn = owner['displayText'] owner_data = self._get_user_details(owner_qn) owners_detail.append(User(**owner_data)) active_owners_list.append(owner_qn) # To avoid the duplication, # we are checking if the fallback is not in data_owners if fallback_owner and (fallback_owner not in active_owners_list): owners_detail.append(User(**self._get_user_details(fallback_owner))) return owners_detail def get_user(self, *, id: str) -> Union[UserEntity, None]: pass def get_users(self) -> List[UserEntity]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions(attrs.get('parameters', dict())) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) badges = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get('classifications') or list(): badges.append( Badge( badge_name=classification.get('typeName'), category="default" ) ) tags = [] for term in table_details.get(self.REL_ATTRS_KEY).get("meanings") or list(): if term.get('entityStatus') == Status.ACTIVE and \ term.get('relationshipStatus') == Status.ACTIVE: tags.append( Tag( tag_name=term.get("displayText"), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) reports_guids = [report.get("guid") for report in attrs.get("reports") or list()] table_type = attrs.get('tableType') or 'table' is_view = 'view' in table_type.lower() readers = self._get_readers(table_details) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), badges=badges, tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=self._get_owners( table_details[self.REL_ATTRS_KEY].get('ownedBy', []), attrs.get('owner')), resource_reports=self._get_reports(guids=reports_guids), columns=columns, is_view=is_view, table_readers=readers, last_updated_timestamp=self._parse_date(table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions, watermarks=self._get_table_watermarks(table_details)) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri)) @staticmethod def _validate_date(text_date: str, date_format: str) -> Tuple[Optional[datetime.datetime], Optional[str]]: try: return datetime.datetime.strptime(text_date, date_format), date_format except (ValueError, TypeError): return None, None @staticmethod def _select_watermark_format(partition_names: List[str]) -> Optional[str]: result = None for partition_name in partition_names: # Assume that all partitions for given table have the same date format. Only thing that needs to be done # is establishing which format out of the supported ones it is and then we validate every partition # against it. for df in app.config['WATERMARK_DATE_FORMATS']: _, result = AtlasProxy._validate_date(partition_name, df) if result: LOGGER.debug('Established date format', extra=dict(date_format=result)) return result return result @staticmethod def _render_partition_key_name(entity: AtlasEntityWithExtInfo) -> Optional[str]: _partition_keys = [] for partition_key in entity.get('attributes', dict()).get('partitionKeys', []): partition_key_column_name = partition_key.get('displayName') if partition_key_column_name: _partition_keys.append(partition_key_column_name) partition_key = ' '.join(_partition_keys).strip() return partition_key def _get_table_watermarks(self, entity: AtlasEntityWithExtInfo) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) names = [_partition.get('displayText') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE] if not names: return [] partition_key = self._render_partition_key_name(entity) watermark_date_format = self._select_watermark_format(names) partitions = {} for _partition in _partitions: partition_name = _partition.get('displayText') if partition_name and watermark_date_format: partition_date, _ = self._validate_date(partition_name, watermark_date_format) if partition_date: common_values = {'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': 0, 'partition_key': partition_key} partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return [] def delete_owner(self, *, table_uri: str, owner: str) -> None: """ :param table_uri: :param owner: :return: """ table = self._get_table_entity(table_uri=table_uri) table_entity = table.entity if table_entity[self.REL_ATTRS_KEY].get("ownedBy"): try: active_owners = filter(lambda item: item['relationshipStatus'] == Status.ACTIVE and item['displayText'] == owner, table_entity[self.REL_ATTRS_KEY]['ownedBy']) if list(active_owners): self.client.relationship.delete_relationship_by_guid( guid=next(active_owners).get('relationshipGuid') ) else: raise BadRequest('You can not delete this owner.') except Exception as ex: LOGGER.exception('Error while removing table data owner. {}' .format(str(ex))) def add_owner(self, *, table_uri: str, owner: str) -> None: """ Query on Atlas User entity to find if the entity exist for the owner string in parameter, if not create one. And then use that User entity's GUID and add a relationship between Table and User, on ownedBy field. :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ owner_info = self._get_user_details(owner) if not owner_info: raise NotFoundException(f'User "{owner}" does not exist.') user_dict = type_coerce({ "entity": { "typeName": "User", "attributes": {"qualifiedName": owner}, } }, AtlasEntityWithExtInfo) # Get or Create a User user_entity = self.client.entity.create_entity(user_dict) user_guid = next(iter(user_entity.guidAssignments.values())) table = self._get_table_entity(table_uri=table_uri) entity_def = { "typeName": "DataSet_Users_Owner", "end1": { "guid": table.entity.get("guid"), "typeName": "Table", }, "end2": { "guid": user_guid, "typeName": "User", }, } try: relationship = type_coerce(entity_def, AtlasRelationship) self.client.relationship.create_relationship(relationship=relationship) except Exception as ex: LOGGER.exception('Error while adding the owner information. {}' .format(str(ex))) raise BadRequest(f'User {owner} is already added as a data owner for ' f'table {table_uri}.') def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ table = self._get_table_entity(table_uri=table_uri) self.client.entity.partial_update_entity_by_guid( entity_guid=table.entity.get("guid"), attr_value=description, attr_name='description' ) @_CACHE.cache('_get_user_defined_glossary_guid') def _get_user_defined_glossary_guid(self) -> str: """ This function look for a user defined glossary i.e., self.ATLAS_USER_DEFINED_TERMS If there is not one available, this will create a new glossary. The meain reason to put this functionality into a separate function is to avoid the lookup each time someone assigns a tag to a data source. :return: Glossary object, that holds the user defined terms. """ # Check if the user glossary already exists glossaries = self.client.glossary.get_all_glossaries() for glossary in glossaries: if glossary.get(self.QN_KEY) == self.AMUNDSEN_USER_TAGS: return glossary[self.GUID_KEY] # If not already exists, create one glossary_def = AtlasGlossary({"name": self.AMUNDSEN_USER_TAGS, "shortDescription": "Amundsen User Defined Terms"}) glossary = self.client.glossary.create_glossary(glossary_def) return glossary.guid @_CACHE.cache('_get_create_glossary_term') def _get_create_glossary_term(self, term_name: str) -> Union[AtlasGlossaryTerm, AtlasEntityHeader]: """ Since Atlas does not provide any API to find a term directly by a qualified name, we need to look for AtlasGlossaryTerm via basic search, if found then return, else create a new glossary term under the user defined glossary. :param term_name: Name of the term. NOTE: this is different from qualified name. :return: Term Object. """ params = { 'typeName': "AtlasGlossaryTerm", 'excludeDeletedEntities': True, 'includeSubTypes': True, 'attributes': ["assignedEntities", ], 'entityFilters': {'condition': "AND", 'criterion': [{'attributeName': "name", 'operator': "=", 'attributeValue': term_name}] } } result = self.client.discovery.faceted_search(search_parameters=params) if result.approximateCount: term = result.entities[0] else: glossary_guid = self._get_user_defined_glossary_guid() glossary_def = AtlasGlossaryHeader({'glossaryGuid': glossary_guid}) term_def = AtlasGlossaryTerm({'name': term_name, 'anchor': glossary_def}) term = self.client.glossary.create_glossary_term(term_def) return term def add_tag(self, *, id: str, tag: str, tag_type: str = "default", resource_type: ResourceType = ResourceType.Table) -> None: """ Assign the Glossary Term to the give table. If the term is not there, it will create a new term under the Glossary self.ATLAS_USER_DEFINED_TERMS :param id: Table URI / Dashboard ID etc. :param tag: Tag Name :param tag_type :return: None """ entity = self._get_table_entity(table_uri=id) term = self._get_create_glossary_term(tag) related_entity = AtlasRelatedObjectId({self.GUID_KEY: entity.entity[self.GUID_KEY], "typeName": resource_type.name}) self.client.glossary.assign_term_to_entities(term.guid, [related_entity]) def add_badge(self, *, id: str, badge_name: str, category: str = '', resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Removes the Glossary Term assignment from the provided source. :param id: Table URI / Dashboard ID etc. :param tag: Tag Name :return:None """ entity = self._get_table_entity(table_uri=id) term = self._get_create_glossary_term(tag) if not term: return assigned_entities = self.client.glossary.get_entities_assigned_with_term(term.guid, "ASC", -1, 0) for item in assigned_entities or list(): if item.get(self.GUID_KEY) == entity.entity[self.GUID_KEY]: related_entity = AtlasRelatedObjectId(item) return self.client.glossary.disassociate_term_from_entities(term.guid, [related_entity]) def delete_badge(self, *, id: str, badge_name: str, category: str, resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) col_guid = column_detail[self.GUID_KEY] self.client.entity.partial_update_entity_by_guid( entity_guid=col_guid, attr_value=description, attr_name='description' ) def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') def _serialize_popular_tables(self, entities: list) -> List[PopularTable]: """ Gets a list of entities and serialize the popular tables. :param entities: List of entities from atlas client :return: a list of PopularTable objects """ popular_tables = list() for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY) ) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_popular_tables(self, *, num_entries: int, user_id: Optional[str] = None) -> List[PopularTable]: """ Generates a list of Popular tables to be shown on the home page of Amundsen. :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_query_params = {'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries} search_results = self.client.discovery.faceted_search(search_parameters=popular_query_params) return self._serialize_popular_tables(search_results.entities) def get_latest_updated_ts(self) -> int: date = None metrics = self.client.admin.get_metrics() try: date = self._parse_date(metrics.general.get('stats', {}).get('Notification:lastMessageProcessedTime')) except AttributeError: pass return date or 0 def get_tags(self) -> List: """ Fetch all the glossary terms from atlas, along with their assigned entities as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] params = { 'typeName': "AtlasGlossaryTerm", 'limit': 1000, 'offset': 0, 'excludeDeletedEntities': True, 'includeSubTypes': True, 'attributes': ["assignedEntities", ] } glossary_terms = self.client.discovery.faceted_search(search_parameters=params) for item in glossary_terms.entities or list(): tags.append( TagDetail( tag_name=item.attributes.get("name"), tag_count=len(item.attributes.get("assignedEntities")) ) ) return tags def get_badges(self) -> List: badges = list() metrics = self.client.admin.get_metrics() try: system_badges = metrics["tag"].get("tagEntities").keys() for item in system_badges: badges.append( Badge(badge_name=item, category="default") ) except AttributeError: LOGGER.info("No badges/classifications available in the system.") return badges def _get_resources_followed_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc followed by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ params = { 'typeName': self.BOOKMARK_TYPE, 'offset': '0', 'limit': '1000', 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [ { 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': f'.{user_id}.bookmark' }, { 'attributeName': self.BOOKMARK_ACTIVE_KEY, 'operator': 'eq', 'attributeValue': 'true' } ] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the bookmark entities based on filters search_results = self.client.discovery.faceted_search(search_parameters=params) resources = [] for record in search_results.entities: table_info = self._extract_info_from_uri(table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_bookmark_qn(record.attributes[self.QN_KEY]) resources.append(PopularTable( database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return resources def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary, Any]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc owned by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ resources = list() if resource_type == ResourceType.Table.name: type_regex = "(.*)_table$" entity_type = 'Table' # elif resource_type == ResourceType.Dashboard.name: # type_regex = "Dashboard" # entity_type = 'Dashboard' else: LOGGER.exception(f'Resource Type ({resource_type}) is not yet implemented') raise NotImplemented user_entity = self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_id)]).entity if not user_entity: LOGGER.exception(f'User ({user_id}) not found in Atlas') raise NotFoundException(f'User {user_id} not found.') resource_guids = set() for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list(): if (item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE and re.compile(type_regex).match(item['typeName'])): resource_guids.add(item[self.GUID_KEY]) owned_tables_query = f'{entity_type} where owner like "{user_id.lower()}*" and __state = "ACTIVE"' table_entities = self.client.discovery.dsl_search(owned_tables_query) for table in table_entities.entities or list(): resource_guids.add(table.guid) if resource_guids: resource_guids_chunks = AtlasProxy.split_list_to_chunks(list(resource_guids), 100) for chunk in resource_guids_chunks: entities = self.client.entity.get_entities_by_guids(guids=list(chunk), ignore_relationships=True) if resource_type == ResourceType.Table.name: resources += self._serialize_popular_tables(entities.entities) else: LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"') return resources @staticmethod def split_list_to_chunks(input_list: List[Any], n: int) -> Generator: """Yield successive n-sized chunks from lst.""" for i in range(0, len(input_list), n): yield input_list[i:i + n] def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ -> Dict[str, List[DashboardSummary]]: pass def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: tables = list() if relation_type == UserResourceRel.follow: tables = self._get_resources_followed_by_user(user_id=user_email, resource_type=ResourceType.Table.name) elif relation_type == UserResourceRel.own: tables = self._get_resources_owned_by_user(user_id=user_email, resource_type=ResourceType.Table.name) return {'table': tables} def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_email)]).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == Status.ACTIVE and relationship_status == Status.ACTIVE: readers_guids.append(user_reads['guid']) readers = self.client.entity.get_entities_by_guids(guids=list(readers_guids), ignore_relationships=True) _results = {} for reader in readers.entities or list(): entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results} def add_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented('resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = True entity.update() def delete_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented('resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = False entity.update() def _parse_date(self, date: int) -> Optional[int]: try: date_str = str(date) date_trimmed = date_str[:10] assert len(date_trimmed) == 10 return int(date_trimmed) except Exception: return None def _get_readers(self, entity: AtlasEntityWithExtInfo, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = self.client.entity.get_entities_by_guids(guids=list(guids), ignore_relationships=False) _result = [] for _reader in readers.entities or list(): read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result def _get_programmatic_descriptions(self, parameters: dict) -> List[ProgrammaticDescription]: programmatic_descriptions: Dict[str, ProgrammaticDescription] = {} for source, text in parameters.items(): use_parameter = True for regex_filter in app.config['PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']: pattern = re.compile(regex_filter) if pattern.match(source): use_parameter = False break if use_parameter: source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower() programmatic_descriptions[source] = ProgrammaticDescription(source=source, text=text) result = dict(sorted(programmatic_descriptions.items())) return list(result.values()) def get_dashboard(self, dashboard_uri: str, ) -> DashboardDetailEntity: pass def get_dashboard_description(self, *, id: str) -> Description: pass def put_dashboard_description(self, *, id: str, description: str) -> None: pass def get_resources_using_table(self, *, id: str, resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: return {} def get_lineage(self, *, id: str, resource_type: ResourceType, direction: str, depth: int) -> Lineage: pass
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = app.config['ATLAS_TABLE_ENTITY'] DB_ATTRIBUTE = app.config['ATLAS_DB_ATTRIBUTE'] STATISTICS_FORMAT_SPEC = app.config['STATISTICS_FORMAT_SPEC'] BOOKMARK_TYPE = 'Bookmark' USER_TYPE = 'User' READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BOOKMARK_ACTIVE_KEY = 'active' ENTITY_ACTIVE_STATUS = 'ACTIVE' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' _CACHE = CacheManager(**parse_cache_config_options( { 'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC })) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '', encrypted: bool = False, validate_ssl: bool = False) -> None: """ Initiate the Apache Atlas client with the provided credentials """ protocol = 'https' if encrypted else 'http' self._driver = Atlas(host=host, port=port, username=user, password=password, protocol=protocol, validate_ssl=validate_ssl) def _get_ids_from_basic_search(self, *, params: Dict) -> List[str]: """ FixMe (Verdan): UNUSED. Please remove after implementing atlas proxy Search for the entities based on the params provided as argument. :param params: the dictionary of parameters to be used for the basic search :return: The flat list of GUIDs of entities founds based on the params. """ ids = list() search_results = self._driver.search_basic(**params) for result in search_results: for entity in result.entities: ids.append(entity.guid) return ids def _get_flat_values_from_dsl(self, dsl_param: dict) -> List: """ Makes a DSL query asking for specific attribute, extracts that attribute from result (which is a list of list, and converts that into a flat list. :param dsl_param: A DSL parameter, with SELECT clause :return: A Flat list of specified attributes in SELECT clause """ attributes: List = list() _search_collection = self._driver.search_dsl(**dsl_param) for collection in _search_collection: attributes = collection.flatten_attrs() return attributes def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile( r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_reader_qn(self, reader_qn: str) -> Dict: """ Parse reader qualifiedName and extract the info :param reader_qn: :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<user_id>[^.]*)\.reader \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(reader_qn) return result.groupdict() if result else dict() def _parse_bookmark_qn(self, bookmark_qn: str) -> Dict: """ Parse bookmark qualifiedName and extract the info :param bookmark_qn: Qualified Name of Bookmark entity :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<entity_type>[^.]*) \. (?P<user_id>[^.]*)\.bookmark \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(bookmark_qn) return result.groupdict() if result else dict() def _get_table_entity(self, *, table_uri: str) -> EntityUniqueAttribute: """ Fetch information from table_uri and then find the appropriate entity The reason, we're not returning the entity_unique_attribute().entity directly is because the entity_unique_attribute() return entity Object that can be used for update purposes, while entity_unique_attribute().entity only returns the dictionary :param table_uri: :return: A tuple of Table entity and parsed information of table qualified name """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name'), table_info.get('cluster'), table_info.get('db')) try: return self._driver.entity_unique_attribute(table_info['entity'], qualifiedName=table_qn) except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException( 'Table URI( {table_uri} ) does not exist'.format( table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute: """ Fetches an user entity from an id :param user_id: :return: """ try: return self._driver.entity_unique_attribute("User", qualifiedName=user_id) except Exception as ex: raise NotFoundException( '(User {user_id}) does not exist'.format(user_id=user_id)) def _create_bookmark(self, entity: EntityUniqueAttribute, user_guid: str, bookmark_qn: str, table_uri: str) -> None: """ Creates a bookmark entity for a specific user and table uri. :param user_guid: User's guid :param bookmark_qn: Bookmark qualifiedName :return: """ bookmark_entity = { 'entity': { 'typeName': self.BOOKMARK_TYPE, 'attributes': { 'qualifiedName': bookmark_qn, self.BOOKMARK_ACTIVE_KEY: True, 'entityUri': table_uri, 'user': { 'guid': user_guid }, 'entity': { 'guid': entity.entity[self.GUID_KEY] } } } } self._driver.entity_post.create(data=bookmark_entity) def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> EntityUniqueAttribute: """ Fetch a Bookmark entity from parsing table uri and user id. If Bookmark is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=entity_uri) bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format( table_info.get('db'), table_info.get('name'), table_info.get('entity'), user_id, table_info.get('cluster')) try: bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) if not bookmark_entity.entity: table_entity = self._get_table_entity(table_uri=entity_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create bookmark entity with the user relation. self._create_bookmark(table_entity, user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri) # Fetch bookmark entity after creating it. bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) return bookmark_entity except Exception as ex: LOGGER.exception(f'Bookmark not found. {str(ex)}') raise NotFoundException( 'Bookmark( {bookmark_qn} ) does not exist'.format( bookmark_qn=bookmark_qn)) def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column[ self.GUID_KEY]] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Statistics model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get( 'columns') or list(): column_status = column.get('entityStatus', 'inactive').lower() if column_status != 'active': continue col_entity = entity.referredEntities[column[self.GUID_KEY]] col_attrs = col_entity[self.ATTRS_KEY] statistics = list() for stats in col_attrs.get('statistics') or list(): stats_attrs = stats['attributes'] stat_type = stats_attrs.get('stat_name') stat_format = self.STATISTICS_FORMAT_SPEC.get( stat_type, dict()) if not stat_format.get('drop', False): stat_type = stat_format.get('new_name', stat_type) stat_val = stats_attrs.get('stat_val') format_val = stat_format.get('format') if format_val: stat_val = format_val.format(stat_val) else: stat_val = str(stat_val) start_epoch = stats_attrs.get('start_epoch') end_epoch = stats_attrs.get('end_epoch') statistics.append( Statistics( stat_type=stat_type, stat_val=stat_val, start_epoch=start_epoch, end_epoch=end_epoch, )) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position') or 9999, stats=statistics, )) return sorted(columns, key=lambda item: item.sort_order) def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities_collection = self._driver.entity_bulk(guid=guids) for report_entity in extract_entities(report_entities_collection): try: if report_entity.status == self.ENTITY_ACTIVE_STATUS: report_attrs = report_entity.attributes reports.append( ResourceReport(name=report_attrs['name'], url=report_attrs['url'])) except (KeyError, AttributeError) as ex: LOGGER.exception( 'Error while accessing table report: {}. {}'.format( str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports def get_user(self, *, id: str) -> Union[UserEntity, None]: pass def get_users(self) -> List[UserEntity]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters')) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], resource_reports=self._get_reports(guids=reports_guids), columns=columns, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri)) def delete_owner(self, *, table_uri: str, owner: str) -> None: pass def add_owner(self, *, table_uri: str, owner: str) -> None: """ It simply replaces the owner field in atlas with the new string. FixMe (Verdan): Implement multiple data owners and atlas changes in the documentation if needed to make owner field a list :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ entity = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['owner'] = owner entity.update() def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ entity = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['description'] = description entity.update() def add_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Assign the tag/classification to the give table API Ref: /resource_EntityREST.html#resource_EntityREST_addClassification_POST :param table_uri: :param tag: Tag/Classification Name :param tag_type :return: None """ entity = self._get_table_entity(table_uri=id) entity_bulk_tag = { "classification": { "typeName": tag }, "entityGuids": [entity.entity[self.GUID_KEY]] } self._driver.entity_bulk_classification.create(data=entity_bulk_tag) def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Delete the assigned classfication/tag from the given table API Ref: /resource_EntityREST.html#resource_EntityREST_deleteClassification_DELETE :param table_uri: :param tag: :return: """ try: entity = self._get_table_entity(table_uri=id) guid_entity = self._driver.entity_guid( entity.entity[self.GUID_KEY]) guid_entity.classifications(tag).delete() except Exception as ex: # FixMe (Verdan): Too broad exception. Please make it specific LOGGER.exception('For some reason this deletes the classification ' 'but also always return exception. {}'.format( str(ex))) def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) col_guid = column_detail[self.GUID_KEY] entity = self._driver.entity_guid(col_guid) entity.entity[self.ATTRS_KEY]['description'] = description entity.update(attribute='description') def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() popular_query_params = { 'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries } search_results = self._driver.search_basic.create( data=popular_query_params) for table in search_results.entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_latest_updated_ts(self) -> int: pass def get_tags(self) -> List: """ Fetch all the classification entity definitions from atlas as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] for metrics in self._driver.admin_metrics: tag_stats = metrics.tag for tag, count in tag_stats["tagEntities"].items(): tags.append(TagDetail(tag_name=tag, tag_count=count)) return tags def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ -> Dict[str, List[DashboardSummary]]: pass def get_table_by_user_relation( self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: params = { 'typeName': self.BOOKMARK_TYPE, 'offset': '0', 'limit': '1000', 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': f'.{user_email}.bookmark' }, { 'attributeName': self.BOOKMARK_ACTIVE_KEY, 'operator': 'eq', 'attributeValue': 'true' }] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the bookmark entities based on filters search_results = self._driver.search_basic.create(data=params) results = [] for record in search_results.entities: table_info = self._extract_info_from_uri( table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_bookmark_qn(record.attributes[self.QN_KEY]) results.append( PopularTable(database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return {'table': results} def get_frequently_used_tables( self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_email).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == 'ACTIVE' and relationship_status == 'ACTIVE': readers_guids.append(user_reads['guid']) readers = extract_entities( self._driver.entity_bulk(guid=readers_guids, ignoreRelationships=True)) _results = {} for reader in readers: entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results} def add_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) self._add_table_relation_by_user(table_uri=id, user_email=user_id, relation_type=relation_type) def _add_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_bookmark_entity(entity_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = True entity.update() def delete_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) self._delete_table_relation_by_user(table_uri=id, user_email=user_id, relation_type=relation_type) def _delete_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_bookmark_entity(entity_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = False entity.update() def _parse_date(self, date: int) -> Optional[int]: try: date_str = str(date) date_trimmed = date_str[:10] assert len(date_trimmed) == 10 return int(date_trimmed) except Exception: return None def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader = Reader(user=User( email=read_entity.relationshipAttributes['user'] ['displayText'], user_id=read_entity.relationshipAttributes['user'] ['displayText']), read_count=read_entity.attributes['count']) results.append(reader) return results def _get_programmatic_descriptions( self, parameters: dict) -> List[ProgrammaticDescription]: programmatic_descriptions: Dict[str, ProgrammaticDescription] = {} for source, text in parameters.items(): use_parameter = True for regex_filter in app.config[ 'PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']: pattern = re.compile(regex_filter) if pattern.match(source): use_parameter = False break if use_parameter: source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower() programmatic_descriptions[source] = ProgrammaticDescription( source=source, text=text) result = dict(sorted(programmatic_descriptions.items())) return list(result.values()) def get_dashboard( self, dashboard_uri: str, ) -> DashboardDetailEntity: pass def get_dashboard_description(self, *, id: str) -> Description: pass def put_dashboard_description(self, *, id: str, description: str) -> None: pass def get_resources_using_table( self, *, id: str, resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: return {}
from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from oslo_log import log as logging from deckhand.conf import config from deckhand.engine import layering CONF = config.CONF LOG = logging.getLogger(__name__) _CACHE_OPTS = { 'cache.type': 'memory', 'expire': CONF.engine.cache_timeout, } _CACHE = CacheManager(**parse_cache_config_options(_CACHE_OPTS)) _DOCUMENT_RENDERING_CACHE = _CACHE.get_cache('rendered_documents_cache') def lookup_by_revision_id(revision_id, documents, **kwargs): """Look up rendered documents by ``revision_id``.""" def do_render(): """Perform document rendering for the revision.""" document_layering = layering.DocumentLayering(documents, **kwargs) return document_layering.render() if CONF.engine.enable_cache: return _DOCUMENT_RENDERING_CACHE.get(key=revision_id, createfunc=do_render) else: return do_render()
IOT_AGENT_URL = os.getenv('IOT_AGENT_URL', 'http://localhost:7896') IOT_AGENT_SERVICE_KEY = os.getenv( 'IOT_AGENT_SERVICE_KEY', '4jggokgpepnvsb2uv4s40d59ov' ) IOT_AGENT_DEVICE = os.getenv('IOT_AGENT_DEVICE', 'sniffer001') def send_measure(device, source, identifier, instant): URL = '%s/iot/d?k=%s&i=%s' % (IOT_AGENT_URL, IOT_AGENT_SERVICE_KEY, device) data = 'm|%s %s %s' % (source, identifier, instant) return requests.post(URL, data=data) cache = CacheManager(**parse_cache_config_options(CACHE_OPTIONS)) def encrypt_string(hash_string): sha_signature = hashlib.sha256(hash_string.encode()).hexdigest() return sha_signature def handle_packet(packet): if not packet.haslayer(Dot11ProbeReq): # noqa: F405 return if packet.type != 0 or packet.subtype != 0x04: return hashed_mac = encrypt_string(packet.addr2)
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = app.config['ATLAS_TABLE_ENTITY'] DB_ATTRIBUTE = app.config['ATLAS_DB_ATTRIBUTE'] READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BKMARKS_KEY = 'isFollowing' METADATA_KEY = 'metadata' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' _CACHE = CacheManager(**parse_cache_config_options({'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC})) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '') -> None: """ Initiate the Apache Atlas client with the provided credentials """ self._driver = Atlas(host=host, port=port, username=user, password=password) def _get_ids_from_basic_search(self, *, params: Dict) -> List[str]: """ FixMe (Verdan): UNUSED. Please remove after implementing atlas proxy Search for the entities based on the params provided as argument. :param params: the dictionary of parameters to be used for the basic search :return: The flat list of GUIDs of entities founds based on the params. """ ids = list() search_results = self._driver.search_basic(**params) for result in search_results: for entity in result.entities: ids.append(entity.guid) return ids def _get_flat_values_from_dsl(self, dsl_param: dict) -> List: """ Makes a DSL query asking for specific attribute, extracts that attribute from result (which is a list of list, and converts that into a flat list. :param dsl_param: A DSL parameter, with SELECT clause :return: A Flat list of specified attributes in SELECT clause """ attributes: List = list() _search_collection = self._driver.search_dsl(**dsl_param) for collection in _search_collection: attributes = collection.flatten_attrs() return attributes def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile(r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_reader_qn(self, reader_qn: str) -> Dict: """ Parse reader qualifiedName and extract the info :param reader_qn: :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile(r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*)\.metadata \. (?P<user_id>[^.]*)\.reader \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(reader_qn) return result.groupdict() if result else dict() def _get_table_entity(self, *, table_uri: str) -> Tuple[EntityUniqueAttribute, Dict]: """ Fetch information from table_uri and then find the appropriate entity The reason, we're not returning the entity_unique_attribute().entity directly is because the entity_unique_attribute() return entity Object that can be used for update purposes, while entity_unique_attribute().entity only returns the dictionary :param table_uri: :return: A tuple of Table entity and parsed information of table qualified name """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name'), table_info.get('cluster'), table_info.get('db') ) try: return self._driver.entity_unique_attribute( table_info['entity'], qualifiedName=table_qn), table_info except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException('Table URI( {table_uri} ) does not exist' .format(table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute: """ Fetches an user entity from an id :param user_id: :return: """ try: return self._driver.entity_unique_attribute("User", qualifiedName=user_id) except Exception as ex: raise NotFoundException('(User {user_id}) does not exist' .format(user_id=user_id)) def _create_reader(self, metadata_guid: str, user_guid: str, reader_qn: str, table_uri: str) -> None: """ Creates a reader entity for a specific user and table uri. :param metadata_guid: Table's metadata guid :param user_guid: User's guid :param reader_qn: Reader qualifiedName :return: """ reader_entity = { 'typeName': self.READER_TYPE, 'attributes': {'qualifiedName': reader_qn, 'isFollowing': True, 'count': 0, 'entityMetadata': {'guid': metadata_guid}, 'user': {'guid': user_guid}, 'entityUri': table_uri} } self._driver.entity_bulk.create(data={'entities': [reader_entity]}) def _get_reader_entity(self, table_uri: str, user_id: str) -> EntityUniqueAttribute: """ Fetch a Reader entity from parsing table uri and user id. If Reader is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=table_uri) reader_qn = '{}.{}.metadata.{}.reader@{}'.format(table_info.get('db'), table_info.get('name'), user_id, table_info.get('cluster')) try: reader_entity = self._driver.entity_unique_attribute( self.READER_TYPE, qualifiedName=reader_qn) if not reader_entity.entity: # Fetch the table entity from the uri for obtaining metadata guid. table_entity, table_info = self._get_table_entity(table_uri=table_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create reader entity with the metadata and user relation. self._create_reader(table_entity.entity[self.ATTRS_KEY][self.METADATA_KEY][self.GUID_KEY], user_entity.entity[self.GUID_KEY], reader_qn, table_uri) # Fetch reader entity after creating it. reader_entity = self._driver.entity_unique_attribute(self.READER_TYPE, qualifiedName=reader_qn) return reader_entity except Exception as ex: LOGGER.exception(f'Reader not found. {str(ex)}') raise NotFoundException('Reader( {reader_qn} ) does not exist' .format(reader_qn=reader_qn)) def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity, _ = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column['guid']] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Statistics model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list(): col_entity = entity.referredEntities[column['guid']] col_attrs = col_entity[self.ATTRS_KEY] col_rel_attrs = col_entity[self.REL_ATTRS_KEY] col_metadata = col_rel_attrs.get('metadata') statistics = list() if col_metadata: col_metadata = entity.referredEntities.get(col_metadata.get('guid')) for stats in col_metadata['attributes'].get('statistics') or list(): stats_attrs = stats['attributes'] statistics.append( Statistics( stat_type=stats_attrs.get('stat_name'), stat_val=stats_attrs.get('stat_val'), start_epoch=stats_attrs.get('start_epoch'), end_epoch=stats_attrs.get('end_epoch'), ) ) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position'), stats=statistics, ) ) return sorted(columns, key=lambda item: item.sort_order) def get_user_detail(self, *, user_id: str) -> Union[UserEntity, None]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity, table_info = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get("classifications") or list(): tags.append( Tag( tag_name=classification.get('typeName'), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], columns=columns, last_updated_timestamp=table_details.get('updateTime')) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri)) def delete_owner(self, *, table_uri: str, owner: str) -> None: pass def add_owner(self, *, table_uri: str, owner: str) -> None: """ It simply replaces the owner field in atlas with the new string. FixMe (Verdan): Implement multiple data owners and atlas changes in the documentation if needed to make owner field a list :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ entity, _ = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['owner'] = owner entity.update() def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity, _ = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ entity, _ = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['description'] = description entity.update() def add_tag(self, *, table_uri: str, tag: str, tag_type: str) -> None: """ Assign the tag/classification to the give table API Ref: /resource_EntityREST.html#resource_EntityREST_addClassification_POST :param table_uri: :param tag: Tag/Classification Name :param tag_type :return: None """ entity, _ = self._get_table_entity(table_uri=table_uri) entity_bulk_tag = {"classification": {"typeName": tag}, "entityGuids": [entity.entity['guid']]} self._driver.entity_bulk_classification.create(data=entity_bulk_tag) def delete_tag(self, *, table_uri: str, tag: str, tag_type: str) -> None: """ Delete the assigned classfication/tag from the given table API Ref: /resource_EntityREST.html#resource_EntityREST_deleteClassification_DELETE :param table_uri: :param tag: :return: """ try: entity, _ = self._get_table_entity(table_uri=table_uri) guid_entity = self._driver.entity_guid(entity.entity['guid']) guid_entity.classifications(tag).delete() except Exception as ex: # FixMe (Verdan): Too broad exception. Please make it specific LOGGER.exception('For some reason this deletes the classification ' 'but also always return exception. {}'.format(str(ex))) def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) col_guid = column_detail['guid'] entity = self._driver.entity_guid(col_guid) entity.entity[self.ATTRS_KEY]['description'] = description entity.update(attribute='description') def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') @_CACHE.region('atlas_proxy', '_get_metadata_entities') def _get_metadata_entities(self, popular_query_params: dict) -> List: try: popular_tables_guids = list() # Fetch the metadata entities based on popularity score search_results = self._driver.search_basic.create(data=popular_query_params) for metadata in search_results.entities: table_guid = metadata.attributes.get("table").get("guid") popular_tables_guids.append(table_guid) # In order to get comments and other extra fields from table entity table_collection = self._driver.entity_bulk(guid=popular_tables_guids, ignoreRelationships=True) table_entities: List = list() for _collection in table_collection: table_entities.extend(_collection.entities) return table_entities except (KeyError, TypeError) as ex: LOGGER.exception(f'_get_metadata_entities Failed : {ex}') raise NotFoundException('Unable to fetch popular tables. ' 'Please check your configurations.') def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() popular_query_params = {'typeName': 'table_metadata', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries, 'attributes': ['table']} table_entities = self._get_metadata_entities(popular_query_params) for table in table_entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY) ) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_latest_updated_ts(self) -> int: pass def get_tags(self) -> List: """ Fetch all the classification entity definitions from atlas as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] for metrics in self._driver.admin_metrics: tag_stats = metrics.tag for tag, count in tag_stats["tagEntities"].items(): tags.append( TagDetail( tag_name=tag, tag_count=count ) ) return tags def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': '1000', 'entityFilters': { 'condition': 'AND', 'criterion': [ { 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': user_email }, { 'attributeName': self.BKMARKS_KEY, 'operator': 'eq', 'attributeValue': 'true' } ] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the reader entities based on filters search_results = self._driver.search_basic.create(data=params) results = [] for record in search_results.entities: table_info = self._extract_info_from_uri(table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_reader_qn(record.attributes[self.QN_KEY]) results.append(PopularTable( database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return {'table': results} def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, Any]: pass def add_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_reader_entity(table_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BKMARKS_KEY] = True entity.update() def delete_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_reader_entity(table_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BKMARKS_KEY] = False entity.update()
import functools import logging from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from event_processor.util.switchable_decorator import SwitchableDecorator from event_processor.config import config cache = CacheManager( **parse_cache_config_options({ 'cache.type': 'file', 'cache.data_dir': '/tmp/beaker/data', 'cache.lock_dir': '/tmp/beaker/lock' })) def try_cache(target): """??? Attempt to get cache result for a given target value""" def try_call(*args, **kwargs): try: return cache.cache('web_call', expire=config.api_cache_expiration)(target)( *args, **kwargs) except Exception as e: logging.getLogger('scrapy').warning( 'Exception while calling cache: ' + str(e)) return target(*args, **kwargs) return try_call cache_call = SwitchableDecorator(try_cache, config.enable_api_cache)
# coding: utf-8 """If we try to use a character not in ascii range as a cache key, we get an unicodeencode error. See https://bitbucket.org/bbangert/beaker/issue/31/cached-function-decorators-break-when-some for more on this """ from nose.tools import * from beaker.cache import CacheManager memory_cache = CacheManager(type='memory') @memory_cache.cache('foo') def foo(whatever): return whatever class bar(object): @memory_cache.cache('baz') def baz(self, qux): return qux @classmethod @memory_cache.cache('bar') def quux(cls, garply): return garply def test_A_unicode_encode_key_str(): eq_(foo('Espanol'), 'Espanol')