def main(global_config, **settings): """Application factory""" config = Configurator(settings=settings, root_factory=RootFactory) declare_routes(config) session_factory = SignedCookieSessionFactory( settings.get('session_key', 'itsaseekreet')) config.set_session_factory(session_factory) global cache cache = CacheManager(**parse_cache_config_options(settings)) from .authnz import APIKeyAuthenticationPolicy api_key_authn_policy = APIKeyAuthenticationPolicy() config.include('openstax_accounts') openstax_authn_policy = config.registry.getUtility( IOpenstaxAccountsAuthenticationPolicy) policies = [api_key_authn_policy, openstax_authn_policy] authn_policy = MultiAuthenticationPolicy(policies) config.set_authentication_policy(authn_policy) authz_policy = ACLAuthorizationPolicy() config.set_authorization_policy(authz_policy) config.scan(ignore='cnxpublishing.tests') return config.make_wsgi_app()
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config))
def setUp(self): self.reg = Registry() self.__temp_dir = tempfile.TemporaryDirectory() self.__work_zone_dir = os.path.join(self.__temp_dir.name, 'work_zone') self.__raw_data_base_dir = os.path.join(self.__temp_dir.name, 'raw_data') self.__item_level_base_dir = os.path.join(self.__temp_dir.name, 'item_level') self.reg.settings = {'extract.work_zone_base_dir': self.__work_zone_dir, 'hpz.file_upload_base_url': 'http://somehost:82/files', 'extract.available_grades': '3,4,5,6,7,8,11', 'extract.raw_data_base_dir': self.__raw_data_base_dir, 'extract.item_level_base_dir': self.__item_level_base_dir} settings = {'extract.celery.CELERY_ALWAYS_EAGER': True} setup_celery(settings) cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) # Set up user context self.__request = DummyRequest() # Must set hook_zca to false to work with unittest_with_sqlite self.__config = testing.setUp(registry=self.reg, request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.SAR_EXTRACTS, ('view', 'logout')), (Allow, RolesConstants.AUDIT_XML_EXTRACTS, ('view', 'logout')), (Allow, RolesConstants.ITEM_LEVEL_EXTRACTS, ('view', 'logout'))] edauth.set_roles(defined_roles) dummy_session = create_test_session([RolesConstants.SAR_EXTRACTS, RolesConstants.AUDIT_XML_EXTRACTS, RolesConstants.ITEM_LEVEL_EXTRACTS]) self.__config.testing_securitypolicy(dummy_session.get_user()) set_tenant_map({get_unittest_tenant_name(): 'NC'})
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) if config.has_key("htpasswd.file"): self.passwdfile = HtpasswdFile(config['htpasswd.file']) self.passwdfile.load() if config.has_key("mediators.dir"): self.mediatorsdir = config['mediators.dir'] if config.has_key("mediators.list"): self.mediatorslist = config['mediators.list'] if config.has_key("vocabularies.dir"): self.vocabulariesdir = config['vocabularies.dir'] if config.has_key("vocabularies.ref"): self.vocabulariesref = config['vocabularies.ref'] if config.has_key("ext_vocabularies.dir"): self.extvocabulariesdir = config['ext_vocabularies.dir'] if config.has_key("svn.username"): self.svnusername = config['svn.username'] if config.has_key("svn.password"): self.svnpassword = config['svn.password'] if config.has_key("conversion_template"): self.conversion_template = config['conversion_template']
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) cache_spec = config.get('buildapi.cache') tz_name = config.get('timezone') tz = pytz.timezone(tz_name) self.tz = tz self.masters_url = config['masters_url'] self.branches_url = config['branches_url'] if hasattr(cacher, 'RedisCache') and cache_spec.startswith('redis:'): # TODO: handle other hosts/ports bits = cache_spec.split(':') kwargs = {} if len(bits) >= 2: kwargs['host'] = bits[1] if len(bits) == 3: kwargs['port'] = int(bits[2]) buildapi_cacher = cacher.RedisCache(**kwargs) elif hasattr(cacher, 'MemcacheCache') and cache_spec.startswith('memcached:'): hosts = cache_spec[10:].split(',') buildapi_cacher = cacher.MemcacheCache(hosts) else: raise RuntimeError("invalid cache spec %r" % (cache_spec,)) self.buildapi_cache = cache.BuildapiCache(buildapi_cacher, tz)
def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with uniittest_with_sqlite reg = Registry() reg.settings = {} reg.settings = {'extract.available_grades': '3,4,5,6,7,8,9,11', 'hpz.file_upload_base_url': 'http://somehost:82/files', 'extract.raw_data_base_dir': '/opt/edware/raw_data'} self.__config = testing.setUp(registry=reg, request=self.__request, hook_zca=False) self.__tenant_name = get_unittest_tenant_name() defined_roles = [(Allow, RolesConstants.AUDIT_XML_EXTRACTS, ('view', 'logout'))] edauth.set_roles(defined_roles) set_tenant_map({get_unittest_tenant_name(): 'NC'}) # Set up context security dummy_session = create_test_session([RolesConstants.AUDIT_XML_EXTRACTS]) self.__config.testing_securitypolicy(dummy_session.get_user()) # celery settings for UT settings = {'extract.celery.CELERY_ALWAYS_EAGER': True} setup_celery(settings) # for UT purposes smarter.extracts.format.json_column_mapping = {} set_tenant_map({'tomcat': 'NC'})
def setUp(self): self.reg = Registry() self.__work_zone_dir = tempfile.TemporaryDirectory() self.reg.settings = {'extract.work_zone_base_dir': '/tmp/work_zone', 'pickup.gatekeeper.t1': '/t/acb', 'pickup.gatekeeper.t2': '/a/df', 'pickup.gatekeeper.y': '/a/c', 'pickup.sftp.hostname': 'hostname.local.net', 'pickup.sftp.user': '******', 'pickup.sftp.private_key_file': '/home/users/myUser/.ssh/id_rsa', 'extract.available_grades': '3,4,5,6,7,8,11'} cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data,public.filtered_data,public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) # Set up user context self.__request = DummyRequest() # Must set hook_zca to false to work with unittest_with_sqlite self.__config = testing.setUp(registry=self.reg, request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.SAR_EXTRACTS, ('view', 'logout'))] edauth.set_roles(defined_roles) set_tenant_map({get_unittest_tenant_name(): 'NC'}) dummy_session = create_test_session([RolesConstants.SAR_EXTRACTS]) self.__config.testing_securitypolicy(dummy_session.get_user())
def test_client_behaviors(self): config = { "cache.lock_dir": "./lock", "cache.data_dir": "./cache", "cache.type": "ext:memcached", "cache.url": mc_url, "cache.memcache_module": "pylibmc", "cache.protocol": "binary", "cache.behavior.ketama": "True", "cache.behavior.cas": False, "cache.behavior.receive_timeout": "3600", "cache.behavior.send_timeout": 1800, "cache.behavior.tcp_nodelay": 1, "cache.behavior.auto_eject": "0", } cache_manager = CacheManager(**parse_cache_config_options(config)) cache = cache_manager.get_cache("test_behavior", expire=6000) with cache.namespace.pool.reserve() as mc: assert "ketama" in mc.behaviors assert mc.behaviors["ketama"] == 1 assert "cas" in mc.behaviors assert mc.behaviors["cas"] == 0 assert "receive_timeout" in mc.behaviors assert mc.behaviors["receive_timeout"] == 3600 assert "send_timeout" in mc.behaviors assert mc.behaviors["send_timeout"] == 1800 assert "tcp_nodelay" in mc.behaviors assert mc.behaviors["tcp_nodelay"] == 1 assert "auto_eject" in mc.behaviors assert mc.behaviors["auto_eject"] == 0
def init_cache(self): cache_opts = { 'cache.type': 'file', 'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock'} cm = CacheManager(**parse_cache_config_options(cache_opts)) return cm.get_cache('schlagzeile', expire=600)
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) """ MongoDB configuration """ db_info = {'host': config['graph.database.host'], 'port': int(config['graph.database.port']), 'db': config['graph.database.db'], 'username': config['graph.database.username'], 'password': config['graph.database.password']} # Populate basic app globals try: conn = Connection(host=db_info['host'], port=db_info['port']) except ConnectionFailure: raise Exception('Unable to connect to MongoDB') self.db = conn[db_info['db']] #auth = self.db.authenticate(db_info['username'], db_info['password']) #if not auth: # raise Exception('Authentication to MongoDB failed')
def __init__(self, config): cache_opts = { 'cache.type': getattr(config, 'citation_cache_type', 'file'), 'cache.data_dir': getattr(config, 'citation_cache_data_dir', None), 'cache.lock_dir': getattr(config, 'citation_cache_lock_dir', None), } self._cache = CacheManager(**parse_cache_config_options(cache_opts)).get_cache('doi')
def __init__(self, config=None, dbengine=None, **kwargs): self.config = config settings = dict( cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", login_url="/login", template_path=os.path.join(os.path.dirname(__file__), "views"), static_path=os.path.join(os.path.dirname(toughwlan.__file__), "static"), xsrf_cookies=True, debug=self.config.system.debug, xheaders=True, config=self.config ) self.db_engine = dbengine self.db = scoped_session(sessionmaker(bind=self.db_engine, autocommit=False, autoflush=False)) self.session_manager = session.SessionManager(settings["cookie_secret"], self.db_engine, 600) self.mcache = cache.CacheManager(self.db_engine) self.cache = CacheManager(**parse_cache_config_options({ 'cache.type': 'file', 'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock' })) self.tp_lookup = TemplateLookup(directories=[settings['template_path']], default_filters=['decode.utf8'], input_encoding='utf-8', output_encoding='utf-8', encoding_errors='replace', module_directory="/tmp/portal") self.init_route() cyclone.web.Application.__init__(self, permit.all_handlers, **settings)
def test_client_behaviors(self): config = { 'cache.lock_dir':'./lock', 'cache.data_dir':'./cache', 'cache.type':'ext:memcached', 'cache.url':mc_url, 'cache.memcache_module':'pylibmc', 'cache.protocol':'binary', 'cache.behavior.ketama': 'True', 'cache.behavior.cas':False, 'cache.behavior.receive_timeout':'3600', 'cache.behavior.send_timeout':1800, 'cache.behavior.tcp_nodelay':1, 'cache.behavior.auto_eject':"0" } cache_manager = CacheManager(**parse_cache_config_options(config)) cache = cache_manager.get_cache('test_behavior', expire=6000) with cache.namespace.pool.reserve() as mc: assert "ketama" in mc.behaviors assert mc.behaviors["ketama"] == 1 assert "cas" in mc.behaviors assert mc.behaviors["cas"] == 0 assert "receive_timeout" in mc.behaviors assert mc.behaviors["receive_timeout"] == 3600 assert "send_timeout" in mc.behaviors assert mc.behaviors["send_timeout"] == 1800 assert "tcp_nodelay" in mc.behaviors assert mc.behaviors["tcp_nodelay"] == 1 assert "auto_eject" in mc.behaviors assert mc.behaviors["auto_eject"] == 0
def load_config(): """ Carrega configuração :return: """ #print(environment) config = ConfigParser.ConfigParser() here = os.path.abspath(os.path.dirname(__file__)) config_file = os.path.join(here, '../' + environment + '.ini') config.read(config_file) # Parâmetros globais de configuração nltk.data.path.append(config.get('nltk', 'data_dir')) nlpnet.set_data_dir(config.get('nlpnet', 'data_dir')) # Logging logging.config.fileConfig(config_file) # Cache configurations cache_opts = { 'cache.regions': config.get('lbsociam', 'cache.regions'), 'cache.type': config.get('lbsociam', 'cache.type'), 'cache.short_term.expire': config.get('lbsociam', 'cache.short_term.expire'), 'cache.default_term.expire': config.get('lbsociam', 'cache.default_term.expire'), 'cache.long_term.expire': config.get('lbsociam', 'cache.long_term.expire') } cache = CacheManager(**parse_cache_config_options(cache_opts)) return config
def setUp(self): cache_managers.clear() cache_opts = { 'cache.type': 'memory', 'cache.regions': 'dummyunittest', 'cache.expire': 10 } self.cache_mgr = CacheManager(**parse_cache_config_options(cache_opts))
def setup(config): '''Setup module-global CacheManager''' global _cache_manager opts = parse_cache_config_options(config) _cache_manager = CacheManager(**opts) return
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) self.redis = redis.Redis(host='localhost', port=6379, db=0)
def setUp(self): CacheManager(**parse_cache_config_options({'cache.regions': 'public.shortlived', 'cache.type': 'memory', 'cache.public.shortlived.expire': 7200})) path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../resources/meta/performance') static_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../resources/meta/static') component.provideUtility(PerfMetadataTemplateManager(asmt_meta_dir=path, static_asmt_meta_dir=static_path), IMetadataTemplateManager) static_json = os.path.join(static_path, 'ELA.static_asmt_metadata.json') with open(static_json) as f: self.__metadata = json.loads(f.read())
def __init__(self, app, config=None, environ_key='beaker.cache', **kwargs): """Initialize the Cache Middleware The Cache middleware will make a Cache instance available every request under the ``environ['beaker.cache']`` key by default. The location in environ can be changed by setting ``environ_key``. ``config`` dict All settings should be prefixed by 'cache.'. This method of passing variables is intended for Paste and other setups that accumulate multiple component settings in a single dictionary. If config contains *no cache. prefixed args*, then *all* of the config options will be used to intialize the Cache objects. ``environ_key`` Location where the Cache instance will keyed in the WSGI environ ``**kwargs`` All keyword arguments are assumed to be cache settings and will override any settings found in ``config`` """ self.app = app config = config or {} self.options = {} # Update the options with the parsed config self.options.update(parse_cache_config_options(config)) # Add any options from kwargs, but leave out the defaults this # time self.options.update( parse_cache_config_options(kwargs, include_defaults=False)) # Assume all keys are intended for cache if none are prefixed with # 'cache.' if not self.options and config: self.options = config self.options.update(kwargs) self.cache_manager = CacheManager(**self.options) self.environ_key = environ_key
def setUp(self): reg = {} reg['cache.expire'] = 10 reg['cache.regions'] = 'session' reg['cache.type'] = 'memory' reg['batch.user.session.timeout'] = 15 CacheManager(**parse_cache_config_options(reg)) component.provideUtility(SessionBackend(reg), ISessionBackend)
def setUp(self): reg = {} reg['cache.expire'] = 10 reg['cache.regions'] = 'session' reg['cache.type'] = 'memory' reg['batch.user.session.timeout'] = 1 self.cachemgr = CacheManager(**parse_cache_config_options(reg)) self.backend = BeakerBackend(reg)
def setUp(self): cache_managers.clear() cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.data, public.filtered_data, unittest, public.shortlived, public.very_shortlived' } self.cache_mgr = CacheManager(**parse_cache_config_options(cache_opts)) set_tenant_map({'tomcat': 'NC', get_unittest_tenant_name(): 'NC'})
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) from turbomail.adapters import tm_pylons tm_pylons.start_extension(config)
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) atexit.register(tm_pylons.shutdown_extension) interface.start(tm_pylons.FakeConfigObj(config))
def __init__(self): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) self.graph = rdfSubject.db = ConjunctiveGraph() fn1 = os.path.join(config["pylons.paths"]["data"], "capitalclass97.ttl") fn2 = os.path.join(config["pylons.paths"]["data"], "dump.ttl") self.graph.parse(fn1, format="n3") self.graph.parse(fn2, format="n3") for a in Article.ClassInstances(): makers = [] for c in a.creators: personSlug = "-".join(unicodedata.normalize("NFKD", c).encode("ascii", "ignore").split()).lower() uri = "".join(["<", "http://localhost:5000/entities/", personSlug, "#id", ">"]) p = Person(uri) p.name = c makers.append(p) a.makers = makers ies = a.ives irs = a.ivrs a.ives = [] a.ivrs = [] for ie in ies: personSlug = "-".join(unicodedata.normalize("NFKD", ie).encode("ascii", "ignore").split()).lower() uri = "".join(["<", "http://localhost:5000/entities/", personSlug, "#id", ">"]) p = Person(uri) p.name = ie a.ives = a.ives + [p] for ir in irs: personSlug = "-".join(unicodedata.normalize("NFKD", ir).encode("ascii", "ignore").split()).lower() uri = "".join(["<", "http://localhost:5000/entities/", personSlug, "#id", ">"]) p = Person(uri) p.name = ir a.ivrs = a.ivrs + [p] for s, p, o in self.graph.triples((None, BIBO.interviewer, None)): if isinstance(o, Literal): self.graph.remove((s, p, o)) for s, p, o in self.graph.triples((None, BIBO.interviewee, None)): if isinstance(o, Literal): self.graph.remove((s, p, o)) fn3 = os.path.join(config["pylons.paths"]["data"], "convert.ttl") f = open(fn3, "rw") self.graph.serialize(fn3) f.close() self.baseUrl = "http://localhost:5000/"
def __init__(self,credentialInterfaces,credentialsChecker): self.credentialInterfaces = credentialInterfaces self.checker = credentialsChecker #initialize cache cacheOptions = { 'cache.type': 'memory', } self.cache = CacheManager(**parse_cache_config_options(cacheOptions))
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.mongodb_conn = Connection(config['mongodb.url']) self.mongodb = self.mongodb_conn[config['mongodb.db_name']] self.cache = CacheManager(**parse_cache_config_options(config))
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.cache = CacheManager(**parse_cache_config_options(config)) self.dbpool = PooledDB(psycopg2, 10, database='baza1', user='******', password='******', host='localhost', port=5432)
def beaker_config_update(app, d1, d2): ''' Called by Config to update the Cache Configuration. ''' app = bkmw.SessionMiddleware(app, d1) cache = bkcache.CacheManager(**bkutil.parse_cache_config_options(d2)) logger = logging.getLogger() logger.debug('Beaker config updated') return app, cache
def __init__(self, config): """One instance of Globals is created during application initialization and is available during requests via the 'app_globals' variable """ self.DROPBOX_APP_KEY = "jtnr5188cxxn120" self.DROPBOX_APP_SECRET = "yz9rwnfkby86geu" self.cache = CacheManager(**parse_cache_config_options(config))
def setUp(self): cache_opts = { 'cache.type': 'memory', 'cache.regions': 'public.shortlived' } CacheManager(**parse_cache_config_options(cache_opts)) self.__request = DummyRequest() # Must set hook_zca to false to work with uniittest_with_sqlite reg = Registry() # Set up defined roles self.__tenant_name = get_unittest_tenant_name() set_tenant_map({self.__tenant_name: "NC"}) self.__temp_dir = tempfile.mkdtemp() reg.settings = {} reg.settings['pdf.report_base_dir'] = self.__temp_dir self.__config = testing.setUp(registry=reg, request=self.__request, hook_zca=False) defined_roles = [(Allow, RolesConstants.PII, ('view', 'logout'))] edauth.set_roles(defined_roles) # Set up context security dummy_session = create_test_session([RolesConstants.PII]) dummy_session.set_user_context([ RoleRelation(RolesConstants.PII, self.__tenant_name, 'NC', '228', '242'), RoleRelation(RolesConstants.PII, self.__tenant_name, 'NC', '229', '939') ]) self.__config.testing_securitypolicy(dummy_session.get_user()) # celery settings for UT settings = {'services.celery.CELERY_ALWAYS_EAGER': True} self.__request.matchdict[Constants.REPORT] = 'indivStudentReport.html' self.__request.cookies = {'edware': '123'} setup_celery(settings)
import logging from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from config import ConfigSection from sqlalchemy import * from sqlalchemy.orm import create_session from sqlalchemy.ext.declarative import declarative_base _config = ConfigSection('staticdata') _log = logging.getLogger('sound.srp.be.staticdata') Base = declarative_base() engine = create_engine(_config.get_option('connection_string')) metadata = MetaData(bind = engine) session = create_session(bind = engine) cache = CacheManager(**parse_cache_config_options( { 'cache.type': _config.get_option('cache_type') })) class InvType(Base): __table__ = Table('invTypes', metadata, autoload=True) @staticmethod @cache.cache('InvType.by_id') def by_id(type_id): _log.debug('Get InvType by id: %d' % type_id) return session.query(InvType).filter_by(typeID = type_id).first() @staticmethod @cache.cache('InvType.by_name') def by_name(type_name): _log.debug('Get InvType by name: %s' % type_name) return session.query(InvType).filter_by(typeName = type_name).first()
IOT_AGENT_URL = os.getenv('IOT_AGENT_URL', 'http://localhost:7896') IOT_AGENT_SERVICE_KEY = os.getenv( 'IOT_AGENT_SERVICE_KEY', '4jggokgpepnvsb2uv4s40d59ov' ) IOT_AGENT_DEVICE = os.getenv('IOT_AGENT_DEVICE', 'sniffer001') def send_measure(device, source, identifier, instant): URL = '%s/iot/d?k=%s&i=%s' % (IOT_AGENT_URL, IOT_AGENT_SERVICE_KEY, device) data = 'm|%s %s %s' % (source, identifier, instant) return requests.post(URL, data=data) cache = CacheManager(**parse_cache_config_options(CACHE_OPTIONS)) def encrypt_string(hash_string): sha_signature = hashlib.sha256(hash_string.encode()).hexdigest() return sha_signature def handle_packet(packet): if not packet.haslayer(Dot11ProbeReq): # noqa: F405 return if packet.type != 0 or packet.subtype != 0x04: return hashed_mac = encrypt_string(packet.addr2)
from .logger import get_logger from .settings import DB_FILEPATH, LOG_LEVEL, ROOT_DIR logger = get_logger(__name__) # peewee logging logger_pw = get_logger('peewee') logger_pw.addHandler(logging.StreamHandler()) logger_pw.setLevel(LOG_LEVEL) db = pw.SqliteDatabase(DB_FILEPATH) # beaker caching via decorator API cache = CacheManager(**parse_cache_config_options({ # 'cache.type': 'file', 'cache.type': 'memory', 'cache.data_dir': '.beaker_cache/data', 'cache.lock_dir': '.beaker_cache/lock', })) # -- Models class BaseModel(pw.Model): class Meta: database = db class File(BaseModel): name = pw.CharField(unique=True) # Cached path for faster loads path = pw.CharField(null=True)
def __init__(self): self.cache = CacheManager( **parse_cache_config_options(self.__cache_opts)) self._cache_map = {}
class Resources: BOOK_URL = "https://www.packtpub.com/packt/offers/free-learning" HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/51.0.2704.79 Safari/537.36" } # Configuring cache cache = CacheManager( **parse_cache_config_options({'cache.type': 'memory'})) def __init__(self, config): self.config = config # create delegate method based on choice if config['events_source'] == 'meetup': self.generate_events = self.meetup_events else: self.generate_events = self.facebook_events @cache.cache('get_events', expire=60) def get_events(self, list_size=5): return list(self.generate_events(list_size)) def meetup_events(self, n): """Obtém eventos do Meetup.""" # api v3 base url all_events = [] for group in self.config['group_name']: url = "https://api.meetup.com/{group}/events".format(group=group) # response for the events r = requests.get( url, params={ 'key': self.config['meetup_key'], 'status': 'upcoming', 'only': 'name,time,link', # filter response to these fields 'page': n, # limit to n events }) # API output events = r.json() for event in events: # convert time returned by Meetup API event['time'] = datetime.datetime.fromtimestamp(event['time'] / 1000, tz=util.AJU_TZ) # shorten url! event['link'] = self.get_short_url(event['link']) all_events.extend(events) return sorted(all_events, key=lambda x: x['time']) def facebook_events(self, n): """Obtém eventos do Facebook.""" all_events = [] for group in self.config['group_name']: # api v2.8 base url url = "https://graph.facebook.com/v2.8/%s/events" % group # response for the events r = requests.get( url, params={ 'access_token': self.config['facebook_key'], 'since': 'today', 'fields': 'name,start_time', # filter response to these fields 'limit': n, # limit to n events }) # API output events = r.json().get('data', []) for event in events: # convert time returned by Facebook API event['time'] = datetime.datetime.strptime( event.pop('start_time'), "%Y-%m-%dT%H:%M:%S%z") # create event link link = "https://www.facebook.com/events/%s" % event.pop('id') # shorten url! event['link'] = self.get_short_url(link) all_events.extend(events) return sorted(all_events, key=lambda x: x['time']) @cache.cache('get_packt_free_book', expire=600) def get_packt_free_book(self): r = requests.get(self.BOOK_URL, headers=self.HEADERS) return self.extract_packt_free_book(r.content, r.encoding) @staticmethod def extract_packt_free_book(content, encoding='utf-8'): if hasattr(content, 'read'): # file-type content = content.read() if isinstance(content, bytes): # convert to str content = content.decode(encoding) # Extracting information with html parser page = BeautifulSoup(content, 'html.parser') dealoftheday = page.select_one( '#deal-of-the-day div div div:nth-of-type(2)') if not dealoftheday: return None book = util.AttributeDict() book['name'] = dealoftheday.select_one( 'div:nth-of-type(2) h2').text.strip() book['summary'] = dealoftheday.select_one( 'div:nth-of-type(3)').text.strip() book['expires'] = int( dealoftheday.select_one( 'span.packt-js-countdown').attrs['data-countdown-to']) return book @cache.cache('get_social_links', expire=3600) def get_social_links(self): remote_url = self.config['remote_resources_url'] if remote_url: url = remote_url + '/social_links.json' try: r = requests.get(url) if r.ok: return OrderedDict(r.json()) except requests.exceptions.RequestException: pass except Exception as e: logging.exception(e) return None @cache.cache('get_short_url') def get_short_url(self, long_url): # Faz a requisição da URL curta somente se houver uma key configurada if self.config['url_shortener_key']: r = requests.post("https://www.googleapis.com/urlshortener/v1/url", params={ 'key': self.config['url_shortener_key'], 'fields': 'id' }, json={'longUrl': long_url}) if r.status_code == 200: return r.json()['id'] else: logging.exception(r.text) # Caso tenha havido algum problema usa a própria URL longa return long_url
def make_cache_obj(**kwargs): opts = defaults.copy() opts.update(kwargs) cache = CacheManager(**util.parse_cache_config_options(opts)) return cache
#python beaker caching from beaker.cache import CacheManager from beaker.util import parse_cache_config_options cache = CacheManager(**parse_cache_config_options({ 'cache.type': 'memory', 'cache.expire': 60 }))
def __init__(self, config_file='augur.config.json', no_config_file=0, description='Augur application'): # Command line arguments # TODO: make this useful self.arg_parser = argparse.ArgumentParser(description=description) self.arg_parser.parse_known_args() # Open the config file self.__already_exported = {} self.__default_config = {'Plugins': []} self.__using_config_file = True self.__config_bad = False self.__config_file_path = os.path.abspath( os.getenv('AUGUR_CONFIG_FILE', config_file)) self.__config_location = os.path.dirname(self.__config_file_path) self.__export_env = os.getenv('AUGUR_ENV_EXPORT', '0') == '1' if os.getenv('AUGUR_ENV_ONLY', '0') != '1' and no_config_file == 0: try: self.__config_file = open(self.__config_file_path, 'r+') except: logger.info( 'Couldn\'t open {}, attempting to create. If you have a augur.cfg, you can convert it to a json file using "make to-json"' .format(config_file)) if not os.path.exists(self.__config_location): os.makedirs(self.__config_location) self.__config_file = open(self.__config_file_path, 'w+') self.__config_bad = True # Options to export the loaded configuration as environment variables for Docker if self.__export_env: export_filename = os.getenv('AUGUR_ENV_EXPORT_FILE', 'augur.cfg.sh') self.__export_file = open(export_filename, 'w+') logger.info( 'Exporting {} to environment variable export statements in {}' .format(config_file, export_filename)) self.__export_file.write('#!/bin/bash\n') # Load the config file try: config_text = self.__config_file.read() config_text = config_text.replace('$(AUGUR)', self.__config_location) self.__config = json.loads(config_text) except json.decoder.JSONDecodeError as e: if not self.__config_bad: self.__using_config_file = False logger.error( '%s could not be parsed, using defaults. Fix that file, or delete it and run this again to regenerate it. Error: %s', self.__config_file_path, str(e)) self.__config = self.__default_config else: self.__using_config_file = False self.__config = self.__default_config # List of data sources that can do periodic updates self.__updatable = [] self.__processes = [] # Create cache cache_config = self.read_config( 'Cache', 'config', None, { 'cache.type': 'file', 'cache.data_dir': 'runtime/cache/', 'cache.lock_dir': 'runtime/cache/' }) if not os.path.exists(cache_config['cache.data_dir']): os.makedirs(cache_config['cache.data_dir']) if not os.path.exists(cache_config['cache.lock_dir']): os.makedirs(cache_config['cache.lock_dir']) cache_parsed = parse_cache_config_options(cache_config) self.cache = CacheManager(**cache_parsed) # Initalize all objects to None self.__ghtorrent = None self.__ghtorrentplus = None self.__githubapi = None self.__git = None self.__librariesio = None self.__downloads = None self.__publicwww = None self.__localCSV = None
def __init__(self): self.logger = logging.getLogger('radiovisserver.radiodns') self.cache = CacheManager(**parse_cache_config_options(config.CACHE_OPTS)).get_cache('radiodns', expire=60)
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = app.config['ATLAS_TABLE_ENTITY'] DB_ATTRIBUTE = app.config['ATLAS_DB_ATTRIBUTE'] STATISTICS_FORMAT_SPEC = app.config['STATISTICS_FORMAT_SPEC'] BOOKMARK_TYPE = 'Bookmark' USER_TYPE = 'User' READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BOOKMARK_ACTIVE_KEY = 'active' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' _CACHE = CacheManager(**parse_cache_config_options( { 'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC })) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '', encrypted: bool = False, validate_ssl: bool = False) -> None: """ Initiate the Apache Atlas client with the provided credentials """ protocol = 'https' if encrypted else 'http' self._driver = Atlas(host=host, port=port, username=user, password=password, protocol=protocol, validate_ssl=validate_ssl) def _get_ids_from_basic_search(self, *, params: Dict) -> List[str]: """ FixMe (Verdan): UNUSED. Please remove after implementing atlas proxy Search for the entities based on the params provided as argument. :param params: the dictionary of parameters to be used for the basic search :return: The flat list of GUIDs of entities founds based on the params. """ ids = list() search_results = self._driver.search_basic(**params) for result in search_results: for entity in result.entities: ids.append(entity.guid) return ids def _get_flat_values_from_dsl(self, dsl_param: dict) -> List: """ Makes a DSL query asking for specific attribute, extracts that attribute from result (which is a list of list, and converts that into a flat list. :param dsl_param: A DSL parameter, with SELECT clause :return: A Flat list of specified attributes in SELECT clause """ attributes: List = list() _search_collection = self._driver.search_dsl(**dsl_param) for collection in _search_collection: attributes = collection.flatten_attrs() return attributes def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile( r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_reader_qn(self, reader_qn: str) -> Dict: """ Parse reader qualifiedName and extract the info :param reader_qn: :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<user_id>[^.]*)\.reader \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(reader_qn) return result.groupdict() if result else dict() def _parse_bookmark_qn(self, bookmark_qn: str) -> Dict: """ Parse bookmark qualifiedName and extract the info :param bookmark_qn: Qualified Name of Bookmark entity :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<entity_type>[^.]*) \. (?P<user_id>[^.]*)\.bookmark \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(bookmark_qn) return result.groupdict() if result else dict() def _get_user_details(self, user_id: str) -> Dict: """ Helper function to help get the user details if the `USER_DETAIL_METHOD` is configured, else uses the user_id for both email and user_id properties. :param user_id: The Unique user id of a user entity :return: a dictionary of user details """ if app.config.get('USER_DETAIL_METHOD'): user_details = app.config.get('USER_DETAIL_METHOD')( user_id) # type: ignore else: user_details = {'email': user_id, 'user_id': user_id} return user_details def _get_table_entity(self, *, table_uri: str) -> EntityUniqueAttribute: """ Fetch information from table_uri and then find the appropriate entity The reason, we're not returning the entity_unique_attribute().entity directly is because the entity_unique_attribute() return entity Object that can be used for update purposes, while entity_unique_attribute().entity only returns the dictionary :param table_uri: :return: A tuple of Table entity and parsed information of table qualified name """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name'), table_info.get('cluster'), table_info.get('db')) try: return self._driver.entity_unique_attribute(table_info['entity'], qualifiedName=table_qn) except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException( 'Table URI( {table_uri} ) does not exist'.format( table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute: """ Fetches an user entity from an id :param user_id: :return: """ try: return self._driver.entity_unique_attribute("User", qualifiedName=user_id) except Exception as ex: raise NotFoundException( '(User {user_id}) does not exist'.format(user_id=user_id)) def _create_bookmark(self, entity: EntityUniqueAttribute, user_guid: str, bookmark_qn: str, table_uri: str) -> None: """ Creates a bookmark entity for a specific user and table uri. :param user_guid: User's guid :param bookmark_qn: Bookmark qualifiedName :return: """ bookmark_entity = { 'entity': { 'typeName': self.BOOKMARK_TYPE, 'attributes': { 'qualifiedName': bookmark_qn, self.BOOKMARK_ACTIVE_KEY: True, 'entityUri': table_uri, 'user': { 'guid': user_guid }, 'entity': { 'guid': entity.entity[self.GUID_KEY] } } } } self._driver.entity_post.create(data=bookmark_entity) def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> EntityUniqueAttribute: """ Fetch a Bookmark entity from parsing table uri and user id. If Bookmark is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=entity_uri) bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format( table_info.get('db'), table_info.get('name'), table_info.get('entity'), user_id, table_info.get('cluster')) try: bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) if not bookmark_entity.entity: table_entity = self._get_table_entity(table_uri=entity_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create bookmark entity with the user relation. self._create_bookmark(table_entity, user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri) # Fetch bookmark entity after creating it. bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) return bookmark_entity except Exception as ex: LOGGER.exception(f'Bookmark not found. {str(ex)}') raise NotFoundException( 'Bookmark( {bookmark_qn} ) does not exist'.format( bookmark_qn=bookmark_qn)) def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column[ self.GUID_KEY]] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Stat model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get( 'columns') or list(): column_status = column.get('entityStatus', 'inactive').lower() if column_status != 'active': continue col_entity = entity.referredEntities[column[self.GUID_KEY]] col_attrs = col_entity[self.ATTRS_KEY] statistics = list() for stats in col_attrs.get('statistics') or list(): stats_attrs = stats['attributes'] stat_type = stats_attrs.get('stat_name') stat_format = self.STATISTICS_FORMAT_SPEC.get( stat_type, dict()) if not stat_format.get('drop', False): stat_type = stat_format.get('new_name', stat_type) stat_val = stats_attrs.get('stat_val') format_val = stat_format.get('format') if format_val: stat_val = format_val.format(stat_val) else: stat_val = str(stat_val) start_epoch = stats_attrs.get('start_epoch') end_epoch = stats_attrs.get('end_epoch') statistics.append( Stat( stat_type=stat_type, stat_val=stat_val, start_epoch=start_epoch, end_epoch=end_epoch, )) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType') or col_attrs.get('data_type'), sort_order=col_attrs.get('position') or 9999, stats=statistics, )) return sorted(columns, key=lambda item: item.sort_order) def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities_collection = self._driver.entity_bulk(guid=guids) for report_entity in extract_entities(report_entities_collection): try: if report_entity.status == Status.ACTIVE: report_attrs = report_entity.attributes reports.append( ResourceReport(name=report_attrs['name'], url=report_attrs['url'])) except (KeyError, AttributeError) as ex: LOGGER.exception( 'Error while accessing table report: {}. {}'.format( str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports def _get_owners(self, data_owners: list, fallback_owner: str = None) -> List[User]: owners_detail = list() active_owners_list = list() active_owners = filter( lambda item: item['entityStatus'] == Status.ACTIVE and item[ 'relationshipStatus'] == Status.ACTIVE, data_owners) for owner in active_owners: owner_qn = owner['displayText'] owner_data = self._get_user_details(owner_qn) owners_detail.append(User(**owner_data)) active_owners_list.append(owner_qn) # To avoid the duplication, # we are checking if the fallback is not in data_owners if fallback_owner and (fallback_owner not in active_owners_list): owners_detail.append( User(**self._get_user_details(fallback_owner))) return owners_detail def get_user(self, *, id: str) -> Union[UserEntity, None]: pass def get_users(self) -> List[UserEntity]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters', dict())) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( 'classifications') or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table_type = attrs.get('tableType') or 'table' is_view = 'view' in table_type.lower() table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=self._get_owners( table_details[self.REL_ATTRS_KEY].get('ownedBy', []), attrs.get('owner')), resource_reports=self._get_reports(guids=reports_guids), columns=columns, is_view=is_view, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions, watermarks=self._get_table_watermarks(table_details)) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri)) @staticmethod def _validate_date( text_date: str, date_format: str ) -> Tuple[Optional[datetime.datetime], Optional[str]]: try: return datetime.datetime.strptime(text_date, date_format), date_format except (ValueError, TypeError): return None, None @staticmethod def _select_watermark_format(partition_names: List[str]) -> Optional[str]: result = None for partition_name in partition_names: # Assume that all partitions for given table have the same date format. Only thing that needs to be done # is establishing which format out of the supported ones it is and then we validate every partition # against it. for df in app.config['WATERMARK_DATE_FORMATS']: _, result = AtlasProxy._validate_date(partition_name, df) if result: LOGGER.debug('Established date format', extra=dict(date_format=result)) return result return result @staticmethod def _render_partition_key_name( entity: EntityUniqueAttribute) -> Optional[str]: _partition_keys = [] for partition_key in entity.get('attributes', dict()).get('partitionKeys', []): partition_key_column_name = partition_key.get('displayName') if partition_key_column_name: _partition_keys.append(partition_key_column_name) partition_key = ' '.join(_partition_keys).strip() return partition_key def _get_table_watermarks( self, entity: EntityUniqueAttribute) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) names = [ _partition.get('displayText') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE ] if not names: return [] partition_key = AtlasProxy._render_partition_key_name(entity) watermark_date_format = AtlasProxy._select_watermark_format(names) partitions = {} for _partition in _partitions: partition_name = _partition.get('displayText') if partition_name and watermark_date_format: partition_date, _ = AtlasProxy._validate_date( partition_name, watermark_date_format) if partition_date: common_values = { 'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': 0, 'partition_key': partition_key } partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return [] def delete_owner(self, *, table_uri: str, owner: str) -> None: """ :param table_uri: :param owner: :return: """ table = self._get_table_entity(table_uri=table_uri) table_entity = table.entity if table_entity[self.REL_ATTRS_KEY].get("ownedBy"): try: active_owners = filter( lambda item: item['relationshipStatus'] == Status.ACTIVE and item['displayText'] == owner, table_entity[self.REL_ATTRS_KEY]['ownedBy']) if list(active_owners): self._driver.relationship_guid( next(active_owners).get('relationshipGuid')).delete() else: raise BadRequest('You can not delete this owner.') except NotFound as ex: LOGGER.exception( 'Error while removing table data owner. {}'.format( str(ex))) def add_owner(self, *, table_uri: str, owner: str) -> None: """ Query on Atlas User entity to find if the entity exist for the owner string in parameter, if not create one. And then use that User entity's GUID and add a relationship between Table and User, on ownedBy field. :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ owner_info = self._get_user_details(owner) if not owner_info: raise NotFoundException(f'User "{owner}" does not exist.') user_dict = { "entity": { "typeName": "User", "attributes": { "qualifiedName": owner }, } } # Get or Create a User user_entity = self._driver.entity_post.create(data=user_dict) user_guid = next(iter(user_entity.get("guidAssignments").values())) table = self._get_table_entity(table_uri=table_uri) entity_def = { "typeName": "DataSet_Users_Owner", "end1": { "guid": table.entity.get("guid"), "typeName": "Table", }, "end2": { "guid": user_guid, "typeName": "User", }, } try: self._driver.relationship.create(data=entity_def) except Conflict as ex: LOGGER.exception( 'Error while adding the owner information. {}'.format(str(ex))) raise BadRequest( f'User {owner} is already added as a data owner for ' f'table {table_uri}.') def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ entity = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['description'] = description entity.update() def add_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Assign the tag/classification to the give table API Ref: /resource_EntityREST.html#resource_EntityREST_addClassification_POST :param table_uri: :param tag: Tag/Classification Name :param tag_type :return: None """ entity = self._get_table_entity(table_uri=id) entity_bulk_tag = { "classification": { "typeName": tag }, "entityGuids": [entity.entity[self.GUID_KEY]] } self._driver.entity_bulk_classification.create(data=entity_bulk_tag) def add_badge(self, *, id: str, badge_name: str, category: str = '', resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Delete the assigned classfication/tag from the given table API Ref: /resource_EntityREST.html#resource_EntityREST_deleteClassification_DELETE :param table_uri: :param tag: :return: """ try: entity = self._get_table_entity(table_uri=id) guid_entity = self._driver.entity_guid( entity.entity[self.GUID_KEY]) guid_entity.classifications(tag).delete() except Exception as ex: # FixMe (Verdan): Too broad exception. Please make it specific LOGGER.exception('For some reason this deletes the classification ' 'but also always return exception. {}'.format( str(ex))) def delete_badge(self, *, id: str, badge_name: str, category: str, resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) col_guid = column_detail[self.GUID_KEY] entity = self._driver.entity_guid(col_guid) entity.entity[self.ATTRS_KEY]['description'] = description entity.update(attribute='description') def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') def _serialize_popular_tables(self, entities: list) -> List[PopularTable]: """ Gets a list of entities and serialize the popular tables. :param entities: List of entities from atlas client :return: a list of PopularTable objects """ popular_tables = list() for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ Generates a list of Popular tables to be shown on the home page of Amundsen. :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_query_params = { 'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries } search_results = self._driver.search_basic.create( data=popular_query_params) return self._serialize_popular_tables(search_results.entities) def get_latest_updated_ts(self) -> int: date = None for metrics in self._driver.admin_metrics: try: date = self._parse_date( metrics.general.get( 'stats', {}).get('Notification:lastMessageProcessedTime')) except AttributeError: pass date = date or 0 return date def get_tags(self) -> List: """ Fetch all the classification entity definitions from atlas as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] for metrics in self._driver.admin_metrics: tag_stats = metrics.tag for tag, count in tag_stats["tagEntities"].items(): tags.append(TagDetail(tag_name=tag, tag_count=count)) return tags def get_badges(self) -> List: # Not implemented return [] def _get_resources_followed_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc followed by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ params = { 'typeName': self.BOOKMARK_TYPE, 'offset': '0', 'limit': '1000', 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': f'.{user_id}.bookmark' }, { 'attributeName': self.BOOKMARK_ACTIVE_KEY, 'operator': 'eq', 'attributeValue': 'true' }] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the bookmark entities based on filters search_results = self._driver.search_basic.create(data=params) resources = [] for record in search_results.entities: table_info = self._extract_info_from_uri( table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_bookmark_qn(record.attributes[self.QN_KEY]) resources.append( PopularTable(database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return resources def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary, Any]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc owned by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ resources = list() if resource_type == ResourceType.Table.name: type_regex = "(.*)_table$" # elif resource_type == ResourceType.Dashboard.name: # type_regex = "Dashboard" else: LOGGER.exception( f'Resource Type ({resource_type}) is not yet implemented') raise NotImplemented user_entity = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_id).entity if not user_entity: LOGGER.exception(f'User ({user_id}) not found in Atlas') raise NotFoundException(f'User {user_id} not found.') resource_guids = set() for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list(): if (item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE and re.compile(type_regex).match(item['typeName'])): resource_guids.add(item[self.GUID_KEY]) params = { 'typeName': self.TABLE_ENTITY, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': 'owner', 'operator': 'startsWith', 'attributeValue': user_id.lower() }] }, 'attributes': [self.GUID_KEY] } table_entities = self._driver.search_basic.create(data=params) for table in table_entities.entities: resource_guids.add(table.guid) if resource_guids: entities = extract_entities( self._driver.entity_bulk(guid=list(resource_guids), ignoreRelationships=True)) if resource_type == ResourceType.Table.name: resources = self._serialize_popular_tables(entities) else: LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"') return resources def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ -> Dict[str, List[DashboardSummary]]: pass def get_table_by_user_relation( self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: tables = list() if relation_type == UserResourceRel.follow: tables = self._get_resources_followed_by_user( user_id=user_email, resource_type=ResourceType.Table.name) elif relation_type == UserResourceRel.own: tables = self._get_resources_owned_by_user( user_id=user_email, resource_type=ResourceType.Table.name) return {'table': tables} def get_frequently_used_tables( self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_email).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == Status.ACTIVE and relationship_status == Status.ACTIVE: readers_guids.append(user_reads['guid']) readers = extract_entities( self._driver.entity_bulk(guid=readers_guids, ignoreRelationships=True)) _results = {} for reader in readers: entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results} def add_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = True entity.update() def delete_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = False entity.update() def _parse_date(self, date: int) -> Optional[int]: try: date_str = str(date) date_trimmed = date_str[:10] assert len(date_trimmed) == 10 return int(date_trimmed) except Exception: return None def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader_qn = read_entity.relationshipAttributes['user'][ 'displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_entity.attributes['count']) results.append(reader) return results def _get_programmatic_descriptions( self, parameters: dict) -> List[ProgrammaticDescription]: programmatic_descriptions: Dict[str, ProgrammaticDescription] = {} for source, text in parameters.items(): use_parameter = True for regex_filter in app.config[ 'PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']: pattern = re.compile(regex_filter) if pattern.match(source): use_parameter = False break if use_parameter: source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower() programmatic_descriptions[source] = ProgrammaticDescription( source=source, text=text) result = dict(sorted(programmatic_descriptions.items())) return list(result.values()) def get_dashboard( self, dashboard_uri: str, ) -> DashboardDetailEntity: pass def get_dashboard_description(self, *, id: str) -> Description: pass def put_dashboard_description(self, *, id: str, description: str) -> None: pass def get_resources_using_table( self, *, id: str, resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: return {}
from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from datetime import datetime defaults = { 'cache.data_dir': './cache', 'cache.type': 'dbm', 'cache.expire': 60, 'cache.regions': 'short_term' } cache = CacheManager(**parse_cache_config_options(defaults)) def get_cached_value(): @cache.region('short_term', 'test_namespacing') def get_value(): return datetime.now() return get_value()
from beaker.cache import CacheManager from beaker.util import parse_cache_config_options cache = CacheManager( **parse_cache_config_options({ 'cache.type': 'memory', 'cache.lock_dir': '/tmp/cache/lock' }))
class Resources: BOOK_URL = "https://www.packtpub.com/packt/offers/free-learning" HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/51.0.2704.79 Safari/537.36" } _BOOK_URL_OFFERS = "https://services.packtpub.com/free-learning-v1/offers" _BOOK_URL_SUMMARY = "https://static.packt-cdn.com/products/{}/summary" # Configuring cache cache = CacheManager( **parse_cache_config_options({'cache.type': 'memory'})) def __init__(self, config: "BotConfig", *, init_db: bool = True): self.config = config if init_db: initialize_database(**config.database) # create delegate method based on choice if 'meetup' in config.events_source: self.generate_events = self.meetup_events else: self.generate_events = self.facebook_events @cache.cache('get_events', expire=60) def get_events(self, list_size=5): return list(self.generate_events(list_size)) def meetup_events(self, n): """Obtém eventos do Meetup.""" url = 'https://secure.meetup.com/oauth2/access' data = { 'client_id': self.config.meetup_client_id, 'client_secret': self.config.meetup_client_secret, 'grant_type': 'refresh_token', 'refresh_token': self.config.meetup_refresh_token, } token = requests.post(url, data=data).json()['access_token'] all_events = [] for group in self.config.group_name: url = "https://api.meetup.com/{group}/events".format(group=group) params = { 'access_token': token, 'has_ended': False, 'page': n, # limit to n events } # response for the events r = requests.get(url, params=params) # API output events = r.json() for event in events: event = event.copy() # convert time returned by Meetup API event['time'] = datetime.datetime.fromtimestamp(event['time'] / 1000, tz=util.AJU_TZ) # shorten url! event['link'] = self.get_short_url(event['link']) all_events.append(event) return sorted(all_events, key=lambda x: x['time']) def facebook_events(self, n): """Obtém eventos do Facebook.""" all_events = [] for group in self.config.group_name: # api v2.8 base url url = "https://graph.facebook.com/v2.8/%s/events" % group # response for the events r = requests.get( url, params={ 'access_token': self.config.facebook_key, 'since': 'today', 'fields': 'name,start_time', # filter response to these fields 'limit': n, # limit to n events }) # API output events = r.json().get('data', []) for event in events: event = event.copy() # convert time returned by Facebook API event['time'] = datetime.datetime.strptime( event.pop('start_time'), "%Y-%m-%dT%H:%M:%S%z") # create event link link = "https://www.facebook.com/events/%s" % event.pop('id') # shorten url! event['link'] = self.get_short_url(link) all_events.append(event) return sorted(all_events, key=lambda x: x['time']) @cache.cache('get_discounts', expire=7200) def get_discounts(self): ''' discountsglobal pode bloquear as requisições Comentar linha caso aconteça ''' # lista de funções de coleta site_functions = [ self.__get_all_onlinetutorials_links, self.__get_all_discountsglobal_links, self.__get_all_learnviral_links, ] # dict que irá receber os resultados das threads self.__coupon_results = {} thread_list = [] for f in site_functions: thread = threading.Thread(target=f) thread.start() thread_list.append(thread) [thread.join() for thread in thread_list] # remove cupons iguais e que não possuem desconto coupons_dict = {} for url, name in self.__coupon_results.items(): if 'https://www.udemy.com/course/' and \ '?couponCode=' not in url: # não possui desconto continue coupons_dict[url.strip()] = name.strip() del self.__coupon_results return coupons_dict # função de coleta 1 def __get_all_discountsglobal_links(self): url = "http://udemycoupon.discountsglobal.com/coupon-category/free-2/" try: r = requests.get(url, headers=self.HEADERS) soup = BeautifulSoup(r.text, 'html5lib') for div in soup.findAll('div', {'class': 'item-panel'})[:7]: name = div.find('h3').find('a').text name = name.replace('Discount: 100% off – ', '') name = name.replace('Discount: 75% off – ', '') name = name.replace('100% off ', '') url = div.find('div', { 'class': 'link-holder' }).find('a').get('href') self.__coupon_results.update({url: name}) except Exception as e: print('get_all_discountsglobal_links', e) # função de coleta 2 def __get_all_learnviral_links(self): url = "https://udemycoupon.learnviral.com/coupon-category/free100-discount/" try: r = requests.get(url, headers=self.HEADERS) soup = BeautifulSoup(r.text, 'html5lib') titles = [ title.text.replace('[Free]','') for title in \ soup.findAll('h3',{'class':'entry-title'}) ] urls = [ a.get('href') for a in \ soup.findAll('a',{'class':'coupon-code-link btn promotion'}) ] self.__coupon_results.update( {url: name for (url, name) in zip(urls[:7], titles[:7])}) except Exception as e: print('get_all_learnviral_links', e) # função de coleta 3 def __get_all_onlinetutorials_links(self): url = "https://onlinetutorials.org" try: r = requests.get(url, headers=self.HEADERS) soup = BeautifulSoup(r.text, 'html5lib') titles = [ title.find('a').text for title in \ soup.findAll('h3',{'class':'entry-title'}) ] urls = [ a.get('href') for a in \ soup.findAll('a',{'class':'coupon-code-link button promotion'}) ] self.__coupon_results.update( {url: name for (url, name) in zip(urls[:7], titles[:7])}) except Exception as e: print('get_all_onlinetutorials_links', e) @cache.cache('get_packt_free_book', expire=600) def get_packt_free_book(self): date_from = datetime.datetime.utcnow().date() date_to = date_from + datetime.timedelta(days=1) # Primeira requisição obtém o ID do livro do dia r = requests.get( url=self._BOOK_URL_OFFERS, params={ "dateFrom": date_from.strftime("%Y-%m-%dT00:00:00.000Z"), "dateTo": date_to.strftime("%Y-%m-%dT00:00:00.000Z") }, ) book_id = r.json()['data'][0]['productId'] # Segunda requisição obtém as informações do livro do dia r = requests.get(url=self._BOOK_URL_SUMMARY.format(book_id)) data = r.json() book = util.AttributeDict() book['name'] = data['title'] book['summary'] = data['oneLiner'] book['cover'] = data['coverImage'] book['expires'] = datetime.datetime.combine( date_to, datetime.time.min).replace(tzinfo=util.UTC_TZ).timestamp() return book @cache.cache('get_short_url') def get_short_url(self, long_url): # Faz a requisição da URL curta somente se houver uma key configurada if self.config.url_shortener_key: r = requests.post("https://www.googleapis.com/urlshortener/v1/url", params={ 'key': self.config.url_shortener_key, 'fields': 'id' }, json={'longUrl': long_url}) if r.status_code == 200: return r.json()['id'] else: logging.exception(r.text) # Caso tenha havido algum problema usa a própria URL longa return long_url ChatState = dict @orm.db_session def set_state(self, state_id: str, chat_id: int, chat_state: ChatState): # to not dump memory-only state chat_state = chat_state.copy() chat_state.pop('__memory__', None) try: state = State[chat_id, state_id] info = json_decode(state.info) if state.info else {} info.update(chat_state) state.info = json_encode(info) except orm.ObjectNotFound: State(telegram_id=chat_id, description=state_id, info=json_encode(chat_state)) @orm.db_session def get_state(self, state_id: str, chat_id: int) -> ChatState: state = State.get(telegram_id=chat_id, description=state_id) if state: return json_decode(state.info) return {} @orm.db_session def update_states(self, states: Dict[str, Dict[int, ChatState]]): for state_id, data in states.items(): for chat_id, chat_state in data.items(): self.set_state(state_id, chat_id, chat_state) @orm.db_session def load_states(self) -> Dict[str, Dict[int, ChatState]]: states = MissingDict( lambda state_id: MissingDict(lambda chat_id: self.__state_dict( state_id, chat_id, self.get_state(state_id, chat_id)))) for state in State.select(): state_id, chat_id, info = state.description, state.telegram_id, state.info states[state_id][chat_id] = self.__state_dict( state_id, chat_id, json_decode(info)) return states def __state_dict(self, state_id, chat_id, data): # reserve a memory-only key if '__memory__' not in data: data['__memory__'] = {} return StateDict(data, dump_function=lambda state: self.set_state( state_id, chat_id, state)) @cache.cache('db.get_group', expire=600) @orm.db_session def get_group(self, group_id: int, group_name: str) -> Group: return self.__get_group(group_id, group_name) def __get_group(self, group_id, group_name): try: return Group[group_id] except orm.ObjectNotFound: return Group(telegram_id=group_id, telegram_groupname=group_name) @orm.db_session def set_group(self, group_id: int, group_name: str, **kwargs): if not kwargs: return group = self.__get_group(group_id, group_name) for k, v in kwargs.items(): setattr(group, k, v) self.cache.invalidate(self.get_group, "db.get_group") @orm.db_session def log_message(self, msg: "telegram.Message", *args, **kwargs): try: user = User[msg.from_user.id] except orm.ObjectNotFound: user = User( telegram_id=msg.from_user.id, telegram_username=msg.from_user.name, ) message = Message( sent_by=user, text=msg.text, sent_at=msg.date, ) print('Logging message: {}'.format(message), ) @orm.db_session def list_all_users(self) -> Tuple[User, ...]: users = User.select().order_by(User.telegram_username)[:] return tuple(users) @orm.db_session def is_user_admin(self, user_id): try: user = User[user_id] except orm.ObjectNotFound: return False return user.is_bot_admin
from collections import defaultdict from typing import Sequence, Tuple, Dict from beaker.cache import CacheManager from beaker.util import parse_cache_config_options import importer import ratt import velo import boats cache_opts = {'cache.type': 'memory', 'cache.lock_dir': 'cache/lock'} transient = CacheManager(**parse_cache_config_options(cache_opts)) cache_opts = { 'cache.type': 'file', 'cache.data_dir': 'cache/data', 'cache.lock_dir': 'cache/lock' } persistent = CacheManager(**parse_cache_config_options(cache_opts)) known_stations_csv = "Lines Stations and Junctions - Timisoara Public Transport - Denumiri-20152012.csv" known_lines_csv = "Timisoara Public Transport - Linii.csv" @transient.cache('all_stations', expire=3600 * 24) def get_stations() -> Dict[str, ratt.Station]: raw_name_to_station = { station.raw_name: station for station in importer.parse_stations_from_csv(known_stations_csv)
"""Contains file_cache and mem_cache beaker caches.""" from beaker.cache import CacheManager # noinspection PyProtectedMember from beaker.util import parse_cache_config_options # todo replace beaker with bolton caches? This is unfriendly and overkill. ENABLE = False _file_cache_opts = { "cache.type": "file", "cache.data_dir": "infiltrate/data/Beaker/tmp/cache/data", "cache.lock_dir": "infiltrate/data/Beaker/tmp/cache/lock", "enabled": ENABLE, } file_cache = CacheManager(**parse_cache_config_options(_file_cache_opts)) _mem_cache_opts = {"cache.type": "memory", "enabled": ENABLE} mem_cache = CacheManager(**parse_cache_config_options(_mem_cache_opts)) def invalidate(): """Kill all the caches. Used after major updates to depended data, like the card list.""" from beaker.cache import cache_managers for _cache in cache_managers.values(): _cache.clear()
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = app.config['ATLAS_TABLE_ENTITY'] DB_ATTRIBUTE = app.config['ATLAS_DB_ATTRIBUTE'] STATISTICS_FORMAT_SPEC = app.config['STATISTICS_FORMAT_SPEC'] BOOKMARK_TYPE = 'Bookmark' USER_TYPE = 'User' READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BOOKMARK_ACTIVE_KEY = 'active' ENTITY_ACTIVE_STATUS = 'ACTIVE' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' _CACHE = CacheManager(**parse_cache_config_options( { 'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC })) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '', encrypted: bool = False, validate_ssl: bool = False) -> None: """ Initiate the Apache Atlas client with the provided credentials """ protocol = 'https' if encrypted else 'http' self._driver = Atlas(host=host, port=port, username=user, password=password, protocol=protocol, validate_ssl=validate_ssl) def _get_ids_from_basic_search(self, *, params: Dict) -> List[str]: """ FixMe (Verdan): UNUSED. Please remove after implementing atlas proxy Search for the entities based on the params provided as argument. :param params: the dictionary of parameters to be used for the basic search :return: The flat list of GUIDs of entities founds based on the params. """ ids = list() search_results = self._driver.search_basic(**params) for result in search_results: for entity in result.entities: ids.append(entity.guid) return ids def _get_flat_values_from_dsl(self, dsl_param: dict) -> List: """ Makes a DSL query asking for specific attribute, extracts that attribute from result (which is a list of list, and converts that into a flat list. :param dsl_param: A DSL parameter, with SELECT clause :return: A Flat list of specified attributes in SELECT clause """ attributes: List = list() _search_collection = self._driver.search_dsl(**dsl_param) for collection in _search_collection: attributes = collection.flatten_attrs() return attributes def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile( r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_reader_qn(self, reader_qn: str) -> Dict: """ Parse reader qualifiedName and extract the info :param reader_qn: :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<user_id>[^.]*)\.reader \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(reader_qn) return result.groupdict() if result else dict() def _parse_bookmark_qn(self, bookmark_qn: str) -> Dict: """ Parse bookmark qualifiedName and extract the info :param bookmark_qn: Qualified Name of Bookmark entity :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile( r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<entity_type>[^.]*) \. (?P<user_id>[^.]*)\.bookmark \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(bookmark_qn) return result.groupdict() if result else dict() def _get_table_entity(self, *, table_uri: str) -> EntityUniqueAttribute: """ Fetch information from table_uri and then find the appropriate entity The reason, we're not returning the entity_unique_attribute().entity directly is because the entity_unique_attribute() return entity Object that can be used for update purposes, while entity_unique_attribute().entity only returns the dictionary :param table_uri: :return: A tuple of Table entity and parsed information of table qualified name """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name'), table_info.get('cluster'), table_info.get('db')) try: return self._driver.entity_unique_attribute(table_info['entity'], qualifiedName=table_qn) except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException( 'Table URI( {table_uri} ) does not exist'.format( table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute: """ Fetches an user entity from an id :param user_id: :return: """ try: return self._driver.entity_unique_attribute("User", qualifiedName=user_id) except Exception as ex: raise NotFoundException( '(User {user_id}) does not exist'.format(user_id=user_id)) def _create_bookmark(self, entity: EntityUniqueAttribute, user_guid: str, bookmark_qn: str, table_uri: str) -> None: """ Creates a bookmark entity for a specific user and table uri. :param user_guid: User's guid :param bookmark_qn: Bookmark qualifiedName :return: """ bookmark_entity = { 'entity': { 'typeName': self.BOOKMARK_TYPE, 'attributes': { 'qualifiedName': bookmark_qn, self.BOOKMARK_ACTIVE_KEY: True, 'entityUri': table_uri, 'user': { 'guid': user_guid }, 'entity': { 'guid': entity.entity[self.GUID_KEY] } } } } self._driver.entity_post.create(data=bookmark_entity) def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> EntityUniqueAttribute: """ Fetch a Bookmark entity from parsing table uri and user id. If Bookmark is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=entity_uri) bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format( table_info.get('db'), table_info.get('name'), table_info.get('entity'), user_id, table_info.get('cluster')) try: bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) if not bookmark_entity.entity: table_entity = self._get_table_entity(table_uri=entity_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create bookmark entity with the user relation. self._create_bookmark(table_entity, user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri) # Fetch bookmark entity after creating it. bookmark_entity = self._driver.entity_unique_attribute( self.BOOKMARK_TYPE, qualifiedName=bookmark_qn) return bookmark_entity except Exception as ex: LOGGER.exception(f'Bookmark not found. {str(ex)}') raise NotFoundException( 'Bookmark( {bookmark_qn} ) does not exist'.format( bookmark_qn=bookmark_qn)) def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column[ self.GUID_KEY]] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Statistics model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get( 'columns') or list(): column_status = column.get('entityStatus', 'inactive').lower() if column_status != 'active': continue col_entity = entity.referredEntities[column[self.GUID_KEY]] col_attrs = col_entity[self.ATTRS_KEY] statistics = list() for stats in col_attrs.get('statistics') or list(): stats_attrs = stats['attributes'] stat_type = stats_attrs.get('stat_name') stat_format = self.STATISTICS_FORMAT_SPEC.get( stat_type, dict()) if not stat_format.get('drop', False): stat_type = stat_format.get('new_name', stat_type) stat_val = stats_attrs.get('stat_val') format_val = stat_format.get('format') if format_val: stat_val = format_val.format(stat_val) else: stat_val = str(stat_val) start_epoch = stats_attrs.get('start_epoch') end_epoch = stats_attrs.get('end_epoch') statistics.append( Statistics( stat_type=stat_type, stat_val=stat_val, start_epoch=start_epoch, end_epoch=end_epoch, )) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position') or 9999, stats=statistics, )) return sorted(columns, key=lambda item: item.sort_order) def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities_collection = self._driver.entity_bulk(guid=guids) for report_entity in extract_entities(report_entities_collection): try: if report_entity.status == self.ENTITY_ACTIVE_STATUS: report_attrs = report_entity.attributes reports.append( ResourceReport(name=report_attrs['name'], url=report_attrs['url'])) except (KeyError, AttributeError) as ex: LOGGER.exception( 'Error while accessing table report: {}. {}'.format( str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports def get_user(self, *, id: str) -> Union[UserEntity, None]: pass def get_users(self) -> List[UserEntity]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters')) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], resource_reports=self._get_reports(guids=reports_guids), columns=columns, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri)) def delete_owner(self, *, table_uri: str, owner: str) -> None: pass def add_owner(self, *, table_uri: str, owner: str) -> None: """ It simply replaces the owner field in atlas with the new string. FixMe (Verdan): Implement multiple data owners and atlas changes in the documentation if needed to make owner field a list :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ entity = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['owner'] = owner entity.update() def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ entity = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['description'] = description entity.update() def add_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Assign the tag/classification to the give table API Ref: /resource_EntityREST.html#resource_EntityREST_addClassification_POST :param table_uri: :param tag: Tag/Classification Name :param tag_type :return: None """ entity = self._get_table_entity(table_uri=id) entity_bulk_tag = { "classification": { "typeName": tag }, "entityGuids": [entity.entity[self.GUID_KEY]] } self._driver.entity_bulk_classification.create(data=entity_bulk_tag) def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Delete the assigned classfication/tag from the given table API Ref: /resource_EntityREST.html#resource_EntityREST_deleteClassification_DELETE :param table_uri: :param tag: :return: """ try: entity = self._get_table_entity(table_uri=id) guid_entity = self._driver.entity_guid( entity.entity[self.GUID_KEY]) guid_entity.classifications(tag).delete() except Exception as ex: # FixMe (Verdan): Too broad exception. Please make it specific LOGGER.exception('For some reason this deletes the classification ' 'but also always return exception. {}'.format( str(ex))) def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) col_guid = column_detail[self.GUID_KEY] entity = self._driver.entity_guid(col_guid) entity.entity[self.ATTRS_KEY]['description'] = description entity.update(attribute='description') def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column(table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() popular_query_params = { 'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries } search_results = self._driver.search_basic.create( data=popular_query_params) for table in search_results.entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_latest_updated_ts(self) -> int: pass def get_tags(self) -> List: """ Fetch all the classification entity definitions from atlas as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] for metrics in self._driver.admin_metrics: tag_stats = metrics.tag for tag, count in tag_stats["tagEntities"].items(): tags.append(TagDetail(tag_name=tag, tag_count=count)) return tags def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ -> Dict[str, List[DashboardSummary]]: pass def get_table_by_user_relation( self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: params = { 'typeName': self.BOOKMARK_TYPE, 'offset': '0', 'limit': '1000', 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': f'.{user_email}.bookmark' }, { 'attributeName': self.BOOKMARK_ACTIVE_KEY, 'operator': 'eq', 'attributeValue': 'true' }] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the bookmark entities based on filters search_results = self._driver.search_basic.create(data=params) results = [] for record in search_results.entities: table_info = self._extract_info_from_uri( table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_bookmark_qn(record.attributes[self.QN_KEY]) results.append( PopularTable(database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return {'table': results} def get_frequently_used_tables( self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self._driver.entity_unique_attribute( self.USER_TYPE, qualifiedName=user_email).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == 'ACTIVE' and relationship_status == 'ACTIVE': readers_guids.append(user_reads['guid']) readers = extract_entities( self._driver.entity_bulk(guid=readers_guids, ignoreRelationships=True)) _results = {} for reader in readers: entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results} def add_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) self._add_table_relation_by_user(table_uri=id, user_email=user_id, relation_type=relation_type) def _add_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_bookmark_entity(entity_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = True entity.update() def delete_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented( 'resource type {} is not supported'.format(resource_type)) self._delete_table_relation_by_user(table_uri=id, user_email=user_id, relation_type=relation_type) def _delete_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_bookmark_entity(entity_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = False entity.update() def _parse_date(self, date: int) -> Optional[int]: try: date_str = str(date) date_trimmed = date_str[:10] assert len(date_trimmed) == 10 return int(date_trimmed) except Exception: return None def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader = Reader(user=User( email=read_entity.relationshipAttributes['user'] ['displayText'], user_id=read_entity.relationshipAttributes['user'] ['displayText']), read_count=read_entity.attributes['count']) results.append(reader) return results def _get_programmatic_descriptions( self, parameters: dict) -> List[ProgrammaticDescription]: programmatic_descriptions: Dict[str, ProgrammaticDescription] = {} for source, text in parameters.items(): use_parameter = True for regex_filter in app.config[ 'PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']: pattern = re.compile(regex_filter) if pattern.match(source): use_parameter = False break if use_parameter: source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower() programmatic_descriptions[source] = ProgrammaticDescription( source=source, text=text) result = dict(sorted(programmatic_descriptions.items())) return list(result.values()) def get_dashboard( self, dashboard_uri: str, ) -> DashboardDetailEntity: pass def get_dashboard_description(self, *, id: str) -> Description: pass def put_dashboard_description(self, *, id: str, description: str) -> None: pass def get_resources_using_table( self, *, id: str, resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: return {}
import functools import logging from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from event_processor.util.switchable_decorator import SwitchableDecorator from event_processor.config import config cache = CacheManager( **parse_cache_config_options({ 'cache.type': 'file', 'cache.data_dir': '/tmp/beaker/data', 'cache.lock_dir': '/tmp/beaker/lock' })) def try_cache(target): """??? Attempt to get cache result for a given target value""" def try_call(*args, **kwargs): try: return cache.cache('web_call', expire=config.api_cache_expiration)(target)( *args, **kwargs) except Exception as e: logging.getLogger('scrapy').warning( 'Exception while calling cache: ' + str(e)) return target(*args, **kwargs) return try_call cache_call = SwitchableDecorator(try_cache, config.enable_api_cache)
def __init__(self, config): self.cache = CacheManager(**parse_cache_config_options(config))
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = app.config['ATLAS_TABLE_ENTITY'] DB_ATTRIBUTE = app.config['ATLAS_DB_ATTRIBUTE'] READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BKMARKS_KEY = 'isFollowing' METADATA_KEY = 'metadata' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' _CACHE = CacheManager(**parse_cache_config_options({'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC})) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '') -> None: """ Initiate the Apache Atlas client with the provided credentials """ self._driver = Atlas(host=host, port=port, username=user, password=password) def _get_ids_from_basic_search(self, *, params: Dict) -> List[str]: """ FixMe (Verdan): UNUSED. Please remove after implementing atlas proxy Search for the entities based on the params provided as argument. :param params: the dictionary of parameters to be used for the basic search :return: The flat list of GUIDs of entities founds based on the params. """ ids = list() search_results = self._driver.search_basic(**params) for result in search_results: for entity in result.entities: ids.append(entity.guid) return ids def _get_flat_values_from_dsl(self, dsl_param: dict) -> List: """ Makes a DSL query asking for specific attribute, extracts that attribute from result (which is a list of list, and converts that into a flat list. :param dsl_param: A DSL parameter, with SELECT clause :return: A Flat list of specified attributes in SELECT clause """ attributes: List = list() _search_collection = self._driver.search_dsl(**dsl_param) for collection in _search_collection: attributes = collection.flatten_attrs() return attributes def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile(r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_reader_qn(self, reader_qn: str) -> Dict: """ Parse reader qualifiedName and extract the info :param reader_qn: :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile(r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*)\.metadata \. (?P<user_id>[^.]*)\.reader \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(reader_qn) return result.groupdict() if result else dict() def _get_table_entity(self, *, table_uri: str) -> Tuple[EntityUniqueAttribute, Dict]: """ Fetch information from table_uri and then find the appropriate entity The reason, we're not returning the entity_unique_attribute().entity directly is because the entity_unique_attribute() return entity Object that can be used for update purposes, while entity_unique_attribute().entity only returns the dictionary :param table_uri: :return: A tuple of Table entity and parsed information of table qualified name """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name'), table_info.get('cluster'), table_info.get('db') ) try: return self._driver.entity_unique_attribute( table_info['entity'], qualifiedName=table_qn), table_info except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException('Table URI( {table_uri} ) does not exist' .format(table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute: """ Fetches an user entity from an id :param user_id: :return: """ try: return self._driver.entity_unique_attribute("User", qualifiedName=user_id) except Exception as ex: raise NotFoundException('(User {user_id}) does not exist' .format(user_id=user_id)) def _create_reader(self, metadata_guid: str, user_guid: str, reader_qn: str, table_uri: str) -> None: """ Creates a reader entity for a specific user and table uri. :param metadata_guid: Table's metadata guid :param user_guid: User's guid :param reader_qn: Reader qualifiedName :return: """ reader_entity = { 'typeName': self.READER_TYPE, 'attributes': {'qualifiedName': reader_qn, 'isFollowing': True, 'count': 0, 'entityMetadata': {'guid': metadata_guid}, 'user': {'guid': user_guid}, 'entityUri': table_uri} } self._driver.entity_bulk.create(data={'entities': [reader_entity]}) def _get_reader_entity(self, table_uri: str, user_id: str) -> EntityUniqueAttribute: """ Fetch a Reader entity from parsing table uri and user id. If Reader is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=table_uri) reader_qn = '{}.{}.metadata.{}.reader@{}'.format(table_info.get('db'), table_info.get('name'), user_id, table_info.get('cluster')) try: reader_entity = self._driver.entity_unique_attribute( self.READER_TYPE, qualifiedName=reader_qn) if not reader_entity.entity: # Fetch the table entity from the uri for obtaining metadata guid. table_entity, table_info = self._get_table_entity(table_uri=table_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create reader entity with the metadata and user relation. self._create_reader(table_entity.entity[self.ATTRS_KEY][self.METADATA_KEY][self.GUID_KEY], user_entity.entity[self.GUID_KEY], reader_qn, table_uri) # Fetch reader entity after creating it. reader_entity = self._driver.entity_unique_attribute(self.READER_TYPE, qualifiedName=reader_qn) return reader_entity except Exception as ex: LOGGER.exception(f'Reader not found. {str(ex)}') raise NotFoundException('Reader( {reader_qn} ) does not exist' .format(reader_qn=reader_qn)) def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity, _ = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column['guid']] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Statistics model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list(): col_entity = entity.referredEntities[column['guid']] col_attrs = col_entity[self.ATTRS_KEY] col_rel_attrs = col_entity[self.REL_ATTRS_KEY] col_metadata = col_rel_attrs.get('metadata') statistics = list() if col_metadata: col_metadata = entity.referredEntities.get(col_metadata.get('guid')) for stats in col_metadata['attributes'].get('statistics') or list(): stats_attrs = stats['attributes'] statistics.append( Statistics( stat_type=stats_attrs.get('stat_name'), stat_val=stats_attrs.get('stat_val'), start_epoch=stats_attrs.get('start_epoch'), end_epoch=stats_attrs.get('end_epoch'), ) ) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position'), stats=statistics, ) ) return sorted(columns, key=lambda item: item.sort_order) def get_user_detail(self, *, user_id: str) -> Union[UserEntity, None]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity, table_info = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get("classifications") or list(): tags.append( Tag( tag_name=classification.get('typeName'), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], columns=columns, last_updated_timestamp=table_details.get('updateTime')) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri)) def delete_owner(self, *, table_uri: str, owner: str) -> None: pass def add_owner(self, *, table_uri: str, owner: str) -> None: """ It simply replaces the owner field in atlas with the new string. FixMe (Verdan): Implement multiple data owners and atlas changes in the documentation if needed to make owner field a list :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ entity, _ = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['owner'] = owner entity.update() def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity, _ = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ entity, _ = self._get_table_entity(table_uri=table_uri) entity.entity[self.ATTRS_KEY]['description'] = description entity.update() def add_tag(self, *, table_uri: str, tag: str, tag_type: str) -> None: """ Assign the tag/classification to the give table API Ref: /resource_EntityREST.html#resource_EntityREST_addClassification_POST :param table_uri: :param tag: Tag/Classification Name :param tag_type :return: None """ entity, _ = self._get_table_entity(table_uri=table_uri) entity_bulk_tag = {"classification": {"typeName": tag}, "entityGuids": [entity.entity['guid']]} self._driver.entity_bulk_classification.create(data=entity_bulk_tag) def delete_tag(self, *, table_uri: str, tag: str, tag_type: str) -> None: """ Delete the assigned classfication/tag from the given table API Ref: /resource_EntityREST.html#resource_EntityREST_deleteClassification_DELETE :param table_uri: :param tag: :return: """ try: entity, _ = self._get_table_entity(table_uri=table_uri) guid_entity = self._driver.entity_guid(entity.entity['guid']) guid_entity.classifications(tag).delete() except Exception as ex: # FixMe (Verdan): Too broad exception. Please make it specific LOGGER.exception('For some reason this deletes the classification ' 'but also always return exception. {}'.format(str(ex))) def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) col_guid = column_detail['guid'] entity = self._driver.entity_guid(col_guid) entity.entity[self.ATTRS_KEY]['description'] = description entity.update(attribute='description') def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') @_CACHE.region('atlas_proxy', '_get_metadata_entities') def _get_metadata_entities(self, popular_query_params: dict) -> List: try: popular_tables_guids = list() # Fetch the metadata entities based on popularity score search_results = self._driver.search_basic.create(data=popular_query_params) for metadata in search_results.entities: table_guid = metadata.attributes.get("table").get("guid") popular_tables_guids.append(table_guid) # In order to get comments and other extra fields from table entity table_collection = self._driver.entity_bulk(guid=popular_tables_guids, ignoreRelationships=True) table_entities: List = list() for _collection in table_collection: table_entities.extend(_collection.entities) return table_entities except (KeyError, TypeError) as ex: LOGGER.exception(f'_get_metadata_entities Failed : {ex}') raise NotFoundException('Unable to fetch popular tables. ' 'Please check your configurations.') def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() popular_query_params = {'typeName': 'table_metadata', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries, 'attributes': ['table']} table_entities = self._get_metadata_entities(popular_query_params) for table in table_entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY) ) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_latest_updated_ts(self) -> int: pass def get_tags(self) -> List: """ Fetch all the classification entity definitions from atlas as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] for metrics in self._driver.admin_metrics: tag_stats = metrics.tag for tag, count in tag_stats["tagEntities"].items(): tags.append( TagDetail( tag_name=tag, tag_count=count ) ) return tags def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': '1000', 'entityFilters': { 'condition': 'AND', 'criterion': [ { 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': user_email }, { 'attributeName': self.BKMARKS_KEY, 'operator': 'eq', 'attributeValue': 'true' } ] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the reader entities based on filters search_results = self._driver.search_basic.create(data=params) results = [] for record in search_results.entities: table_info = self._extract_info_from_uri(table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_reader_qn(record.attributes[self.QN_KEY]) results.append(PopularTable( database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return {'table': results} def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, Any]: pass def add_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_reader_entity(table_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BKMARKS_KEY] = True entity.update() def delete_table_relation_by_user(self, *, table_uri: str, user_email: str, relation_type: UserResourceRel) -> None: entity = self._get_reader_entity(table_uri=table_uri, user_id=user_email) entity.entity[self.ATTRS_KEY][self.BKMARKS_KEY] = False entity.update()
def __init__(self, config_file='augur.config.json', no_config_file=0, description='Augur application', db_str='sqlite:///:memory:'): """ Reads config, creates DB session, and initializes cache """ # Command line arguments # TODO: make this useful self.arg_parser = argparse.ArgumentParser(description=description) self.arg_parser.parse_known_args() # Open the config file self.__already_exported = {} self.__default_config = {'Plugins': []} self.__using_config_file = True self.__config_bad = False self.__config_file_path = os.path.abspath( os.getenv('AUGUR_CONFIG_FILE', config_file)) self.__config_location = os.path.dirname(self.__config_file_path) self.__runtime_location = 'runtime/' self.__export_env = os.getenv('AUGUR_ENV_EXPORT', '0') == '1' if os.getenv('AUGUR_ENV_ONLY', '0') != '1' and no_config_file == 0: try: self.__config_file = open(self.__config_file_path, 'r+') except: logger.info( 'Couldn\'t open {}, attempting to create. If you have a augur.cfg, you can convert it to a json file using "make to-json"' .format(config_file)) if not os.path.exists(self.__config_location): os.makedirs(self.__config_location) self.__config_file = open(self.__config_file_path, 'w+') self.__config_bad = True # Options to export the loaded configuration as environment variables for Docker if self.__export_env: export_filename = os.getenv('AUGUR_ENV_EXPORT_FILE', 'augur.cfg.sh') self.__export_file = open(export_filename, 'w+') logger.info( 'Exporting {} to environment variable export statements in {}' .format(config_file, export_filename)) self.__export_file.write('#!/bin/bash\n') # Load the config file try: config_text = self.__config_file.read() self.__config = json.loads(config_text) except json.decoder.JSONDecodeError as e: if not self.__config_bad: self.__using_config_file = False logger.error( '%s could not be parsed, using defaults. Fix that file, or delete it and run this again to regenerate it. Error: %s', self.__config_file_path, str(e)) self.__config = self.__default_config else: self.__using_config_file = False self.__config = self.__default_config # List of data sources that can do periodic updates self.__updatable = [] self.__processes = [] # Create cache cache_config = { 'cache.type': 'file', 'cache.data_dir': self.path('$(RUNTIME)/cache/'), 'cache.lock_dir': self.path('$(RUNTIME)/cache/') } cache_config.update( self.read_config('Cache', 'config', None, cache_config)) cache_config['cache.data_dir'] = self.path( cache_config['cache.data_dir']) cache_config['cache.lock_dir'] = self.path( cache_config['cache.lock_dir']) if not os.path.exists(cache_config['cache.data_dir']): os.makedirs(cache_config['cache.data_dir']) if not os.path.exists(cache_config['cache.lock_dir']): os.makedirs(cache_config['cache.lock_dir']) cache_parsed = parse_cache_config_options(cache_config) self.cache = CacheManager(**cache_parsed) # Create DB Session self.db = None self.session = None if db_str: self.db = create_engine(db_str) self.__Session = sessionmaker(bind=self.db) self.session = self.__Session() # Initalize all objects to None self.__metrics_status = None self._loaded_plugins = {} for plugin_name in Application.default_plugins: self[plugin_name]
from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from oslo_log import log as logging from deckhand.conf import config from deckhand.engine import layering CONF = config.CONF LOG = logging.getLogger(__name__) _CACHE_OPTS = { 'cache.type': 'memory', 'expire': CONF.engine.cache_timeout, } _CACHE = CacheManager(**parse_cache_config_options(_CACHE_OPTS)) _DOCUMENT_RENDERING_CACHE = _CACHE.get_cache('rendered_documents_cache') def lookup_by_revision_id(revision_id, documents, **kwargs): """Look up rendered documents by ``revision_id``.""" def do_render(): """Perform document rendering for the revision.""" document_layering = layering.DocumentLayering(documents, **kwargs) return document_layering.render() if CONF.engine.enable_cache: return _DOCUMENT_RENDERING_CACHE.get(key=revision_id, createfunc=do_render) else: return do_render()
def __init__(self): """ Reads config, creates DB session, and initializes cache """ self.config_file_name = 'augur.config.json' self.__shell_config = None self.__export_file = None self.__env_file = None self.config = default_config self.env_config = {} self.root_augur_dir = os.path.dirname( os.path.dirname(os.path.realpath(__file__))) default_config_path = self.root_augur_dir + '/' + self.config_file_name using_config_file = False config_locations = [ self.config_file_name, default_config_path, f"/opt/augur/{self.config_file_name}" ] if os.getenv('AUGUR_CONFIG_FILE') is not None: config_file_path = os.getenv('AUGUR_CONFIG_FILE') using_config_file = True else: for index, location in enumerate(config_locations): try: f = open(location, "r+") config_file_path = os.path.abspath(location) using_config_file = True f.close() break except FileNotFoundError: pass if using_config_file: try: with open(config_file_path, 'r+') as config_file_handle: self.config = json.loads(config_file_handle.read()) except json.decoder.JSONDecodeError as e: logger.warning( '%s could not be parsed, using defaults. Fix that file, or delete it and run this again to regenerate it. Error: %s', config_file_path, str(e)) else: logger.warning('%s could not be parsed, using defaults.') self.load_env_configuration() logger.setLevel(self.read_config("Development", "log_level")) self.cache_config = { 'cache.type': 'file', 'cache.data_dir': 'runtime/cache/', 'cache.lock_dir': 'runtime/cache/' } if not os.path.exists(self.cache_config['cache.data_dir']): os.makedirs(self.cache_config['cache.data_dir']) if not os.path.exists(self.cache_config['cache.lock_dir']): os.makedirs(self.cache_config['cache.lock_dir']) cache_parsed = parse_cache_config_options(self.cache_config) self.cache = CacheManager(**cache_parsed) self.database = self.__connect_to_database() self.spdx_db = self.__connect_to_database(include_spdx=True) self.metrics = Metrics(self)
class AtlasProxy(BaseProxy): """ Atlas Proxy client for the amundsen metadata {ATLAS_API_DOCS} = https://atlas.apache.org/api/v2/ """ TABLE_ENTITY = 'Table' DB_ATTRIBUTE = 'db' STATISTICS_FORMAT_SPEC = app.config['STATISTICS_FORMAT_SPEC'] TABLE_TYPE = 'Table' BOOKMARK_TYPE = 'Bookmark' USER_TYPE = 'User' READER_TYPE = 'Reader' QN_KEY = 'qualifiedName' BOOKMARK_ACTIVE_KEY = 'active' GUID_KEY = 'guid' ATTRS_KEY = 'attributes' REL_ATTRS_KEY = 'relationshipAttributes' ENTITY_URI_KEY = 'entityUri' # Qualified Name of the Glossary, that holds the user defined terms. # For Amundsen, we are using Glossary Terms as the Tags. AMUNDSEN_USER_TAGS = 'amundsen_user_tags' _CACHE = CacheManager(**parse_cache_config_options({'cache.regions': 'atlas_proxy', 'cache.atlas_proxy.type': 'memory', 'cache.atlas_proxy.expire': _ATLAS_PROXY_CACHE_EXPIRY_SEC})) def __init__(self, *, host: str, port: int, user: str = 'admin', password: str = '', encrypted: bool = False, validate_ssl: bool = False, client_kwargs: dict = dict()) -> None: """ Initiate the Apache Atlas client with the provided credentials """ protocol = 'https' if encrypted else 'http' self.client = AtlasClient(f'{protocol}://{host}:{port}', (user, password)) self.client.session.verify = validate_ssl def _extract_info_from_uri(self, *, table_uri: str) -> Dict: """ Extracts the table information from table_uri coming from frontend. :param table_uri: :return: Dictionary object, containing following information: entity: Type of entity example: rdbms_table, hive_table etc. cluster: Cluster information db: Database Name name: Table Name """ pattern = re.compile(r""" ^ (?P<entity>.*?) :\/\/ (?P<cluster>.*) \. (?P<db>.*?) \/ (?P<name>.*?) $ """, re.X) result = pattern.match(table_uri) return result.groupdict() if result else dict() def _parse_bookmark_qn(self, bookmark_qn: str) -> Dict: """ Parse bookmark qualifiedName and extract the info :param bookmark_qn: Qualified Name of Bookmark entity :return: Dictionary object containing following information: cluster: cluster information db: Database name name: Table name """ pattern = re.compile(r""" ^(?P<db>[^.]*) \. (?P<table>[^.]*) \. (?P<entity_type>[^.]*) \. (?P<user_id>[^.]*)\.bookmark \@ (?P<cluster>.*) $ """, re.X) result = pattern.match(bookmark_qn) return result.groupdict() if result else dict() def _get_user_details(self, user_id: str) -> Dict: """ Helper function to help get the user details if the `USER_DETAIL_METHOD` is configured, else uses the user_id for both email and user_id properties. :param user_id: The Unique user id of a user entity :return: a dictionary of user details """ if app.config.get('USER_DETAIL_METHOD'): user_details = app.config.get('USER_DETAIL_METHOD')(user_id) # type: ignore else: user_details = {'email': user_id, 'user_id': user_id} return user_details def _get_table_entity(self, *, table_uri: str) -> AtlasEntityWithExtInfo: """ Fetch information from table_uri and then find the appropriate entity :param table_uri: The table URI coming from Amundsen Frontend :return: A table entity matching the Qualified Name derived from table_uri """ table_info = self._extract_info_from_uri(table_uri=table_uri) table_qn = make_table_qualified_name(table_info.get('name', ''), table_info.get('cluster'), table_info.get('db') ) try: return self.client.entity.get_entity_by_attribute(type_name=table_info['entity'], uniq_attributes=[(self.QN_KEY, table_qn)]) except Exception as ex: LOGGER.exception(f'Table not found. {str(ex)}') raise NotFoundException('Table URI( {table_uri} ) does not exist' .format(table_uri=table_uri)) def _get_user_entity(self, user_id: str) -> AtlasEntityWithExtInfo: """ Fetches an user entity from an id :param user_id: User ID :return: A User entity matching the user_id """ try: return self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_id)]) except Exception as ex: raise NotFoundException('(User {user_id}) does not exist' .format(user_id=user_id)) def _create_bookmark(self, entity: AtlasEntityWithExtInfo, user_guid: str, bookmark_qn: str, table_uri: str) -> None: """ Creates a bookmark entity for a specific user and table uri. :param user_guid: User's guid :param bookmark_qn: Bookmark qualifiedName :return: """ bookmark_entity = { 'entity': { 'typeName': self.BOOKMARK_TYPE, 'attributes': {'qualifiedName': bookmark_qn, self.BOOKMARK_ACTIVE_KEY: True, 'entityUri': table_uri, 'user': {'guid': user_guid}, 'entity': {'guid': entity.entity[self.GUID_KEY]}} } } bookmark_entity = type_coerce(bookmark_entity, AtlasEntityWithExtInfo) self.client.entity.create_entity(bookmark_entity) def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> AtlasEntityWithExtInfo: """ Fetch a Bookmark entity from parsing table uri and user id. If Bookmark is not present, create one for the user. :param table_uri: :param user_id: Qualified Name of a user :return: """ table_info = self._extract_info_from_uri(table_uri=entity_uri) bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format(table_info.get('db'), table_info.get('name'), table_info.get('entity'), user_id, table_info.get('cluster')) try: bookmark_entity = self.client.entity.get_entity_by_attribute(type_name=self.BOOKMARK_TYPE, uniq_attributes=[(self.QN_KEY, bookmark_qn)]) except Exception as ex: LOGGER.exception(f'Bookmark not found. {str(ex)}') table_entity = self._get_table_entity(table_uri=entity_uri) # Fetch user entity from user_id for relation user_entity = self._get_user_entity(user_id) # Create bookmark entity with the user relation. self._create_bookmark(table_entity, user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri) # Fetch bookmark entity after creating it. bookmark_entity = self.client.entity.get_entity_by_attribute(type_name=self.BOOKMARK_TYPE, uniq_attributes=[(self.QN_KEY, bookmark_qn)]) return bookmark_entity def _get_column(self, *, table_uri: str, column_name: str) -> Dict: """ Fetch the column information from referredEntities of the table entity :param table_uri: :param column_name: :return: A dictionary containing the column details """ try: table_entity = self._get_table_entity(table_uri=table_uri) columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns') for column in columns or list(): col_details = table_entity.referredEntities[column[self.GUID_KEY]] if column_name == col_details[self.ATTRS_KEY]['name']: return col_details raise NotFoundException(f'Column not found: {column_name}') except KeyError as ex: LOGGER.exception(f'Column not found: {str(ex)}') raise NotFoundException(f'Column not found: {column_name}') def _serialize_columns(self, *, entity: AtlasEntityWithExtInfo) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Stat model. :param entity: AtlasEntityWithExtInfo object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list(): column_status = column.get('entityStatus', 'inactive').lower() if column_status != 'active': continue col_entity = entity.referredEntities[column[self.GUID_KEY]] col_attrs = col_entity[self.ATTRS_KEY] statistics = list() badges = list() for column_classification in col_entity.get('classifications') or list(): if column_classification.get('entityStatus') == Status.ACTIVE: name = column_classification.get('typeName') badges.append(Badge(badge_name=name, category='default')) for stats in col_attrs.get('statistics') or list(): stats_attrs = stats['attributes'] stat_type = stats_attrs.get('stat_name') stat_format = self.STATISTICS_FORMAT_SPEC.get(stat_type, dict()) if not stat_format.get('drop', False): stat_type = stat_format.get('new_name', stat_type) stat_val = stats_attrs.get('stat_val') format_val = stat_format.get('format') if format_val: stat_val = format_val.format(stat_val) else: stat_val = str(stat_val) start_epoch = stats_attrs.get('start_epoch') end_epoch = stats_attrs.get('end_epoch') statistics.append( Stat( stat_type=stat_type, stat_val=stat_val, start_epoch=start_epoch, end_epoch=end_epoch, ) ) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType') or col_attrs.get('data_type'), sort_order=col_attrs.get('position') or 9999, stats=statistics, badges=badges ) ) return sorted(columns, key=lambda item: item.sort_order) def _get_reports(self, guids: List[str]) -> List[ResourceReport]: reports = [] if guids: report_entities = self.client.entity.get_entities_by_guids(guids=guids) for report_entity in report_entities.entities: try: if report_entity.status == Status.ACTIVE: report_attrs = report_entity.attributes reports.append( ResourceReport( name=report_attrs['name'], url=report_attrs['url'] ) ) except (KeyError, AttributeError) as ex: LOGGER.exception('Error while accessing table report: {}. {}' .format(str(report_entity), str(ex))) parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \ if app.config['RESOURCE_REPORT_CLIENT'] else reports return parsed_reports def _get_owners(self, data_owners: list, fallback_owner: str = None) -> List[User]: owners_detail = list() active_owners_list = list() active_owners = filter(lambda item: item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE, data_owners) for owner in active_owners: owner_qn = owner['displayText'] owner_data = self._get_user_details(owner_qn) owners_detail.append(User(**owner_data)) active_owners_list.append(owner_qn) # To avoid the duplication, # we are checking if the fallback is not in data_owners if fallback_owner and (fallback_owner not in active_owners_list): owners_detail.append(User(**self._get_user_details(fallback_owner))) return owners_detail def get_user(self, *, id: str) -> Union[UserEntity, None]: pass def get_users(self) -> List[UserEntity]: pass def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions(attrs.get('parameters', dict())) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) badges = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get('classifications') or list(): badges.append( Badge( badge_name=classification.get('typeName'), category="default" ) ) tags = [] for term in table_details.get(self.REL_ATTRS_KEY).get("meanings") or list(): if term.get('entityStatus') == Status.ACTIVE and \ term.get('relationshipStatus') == Status.ACTIVE: tags.append( Tag( tag_name=term.get("displayText"), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) reports_guids = [report.get("guid") for report in attrs.get("reports") or list()] table_type = attrs.get('tableType') or 'table' is_view = 'view' in table_type.lower() readers = self._get_readers(table_details) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), badges=badges, tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=self._get_owners( table_details[self.REL_ATTRS_KEY].get('ownedBy', []), attrs.get('owner')), resource_reports=self._get_reports(guids=reports_guids), columns=columns, is_view=is_view, table_readers=readers, last_updated_timestamp=self._parse_date(table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions, watermarks=self._get_table_watermarks(table_details)) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri)) @staticmethod def _validate_date(text_date: str, date_format: str) -> Tuple[Optional[datetime.datetime], Optional[str]]: try: return datetime.datetime.strptime(text_date, date_format), date_format except (ValueError, TypeError): return None, None @staticmethod def _select_watermark_format(partition_names: List[str]) -> Optional[str]: result = None for partition_name in partition_names: # Assume that all partitions for given table have the same date format. Only thing that needs to be done # is establishing which format out of the supported ones it is and then we validate every partition # against it. for df in app.config['WATERMARK_DATE_FORMATS']: _, result = AtlasProxy._validate_date(partition_name, df) if result: LOGGER.debug('Established date format', extra=dict(date_format=result)) return result return result @staticmethod def _render_partition_key_name(entity: AtlasEntityWithExtInfo) -> Optional[str]: _partition_keys = [] for partition_key in entity.get('attributes', dict()).get('partitionKeys', []): partition_key_column_name = partition_key.get('displayName') if partition_key_column_name: _partition_keys.append(partition_key_column_name) partition_key = ' '.join(_partition_keys).strip() return partition_key def _get_table_watermarks(self, entity: AtlasEntityWithExtInfo) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) names = [_partition.get('displayText') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE] if not names: return [] partition_key = self._render_partition_key_name(entity) watermark_date_format = self._select_watermark_format(names) partitions = {} for _partition in _partitions: partition_name = _partition.get('displayText') if partition_name and watermark_date_format: partition_date, _ = self._validate_date(partition_name, watermark_date_format) if partition_date: common_values = {'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': 0, 'partition_key': partition_key} partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return [] def delete_owner(self, *, table_uri: str, owner: str) -> None: """ :param table_uri: :param owner: :return: """ table = self._get_table_entity(table_uri=table_uri) table_entity = table.entity if table_entity[self.REL_ATTRS_KEY].get("ownedBy"): try: active_owners = filter(lambda item: item['relationshipStatus'] == Status.ACTIVE and item['displayText'] == owner, table_entity[self.REL_ATTRS_KEY]['ownedBy']) if list(active_owners): self.client.relationship.delete_relationship_by_guid( guid=next(active_owners).get('relationshipGuid') ) else: raise BadRequest('You can not delete this owner.') except Exception as ex: LOGGER.exception('Error while removing table data owner. {}' .format(str(ex))) def add_owner(self, *, table_uri: str, owner: str) -> None: """ Query on Atlas User entity to find if the entity exist for the owner string in parameter, if not create one. And then use that User entity's GUID and add a relationship between Table and User, on ownedBy field. :param table_uri: :param owner: Email address of the owner :return: None, as it simply adds the owner. """ owner_info = self._get_user_details(owner) if not owner_info: raise NotFoundException(f'User "{owner}" does not exist.') user_dict = type_coerce({ "entity": { "typeName": "User", "attributes": {"qualifiedName": owner}, } }, AtlasEntityWithExtInfo) # Get or Create a User user_entity = self.client.entity.create_entity(user_dict) user_guid = next(iter(user_entity.guidAssignments.values())) table = self._get_table_entity(table_uri=table_uri) entity_def = { "typeName": "DataSet_Users_Owner", "end1": { "guid": table.entity.get("guid"), "typeName": "Table", }, "end2": { "guid": user_guid, "typeName": "User", }, } try: relationship = type_coerce(entity_def, AtlasRelationship) self.client.relationship.create_relationship(relationship=relationship) except Exception as ex: LOGGER.exception('Error while adding the owner information. {}' .format(str(ex))) raise BadRequest(f'User {owner} is already added as a data owner for ' f'table {table_uri}.') def get_table_description(self, *, table_uri: str) -> Union[str, None]: """ :param table_uri: :return: The description of the table as a string """ entity = self._get_table_entity(table_uri=table_uri) return entity.entity[self.ATTRS_KEY].get('description') def put_table_description(self, *, table_uri: str, description: str) -> None: """ Update the description of the given table. :param table_uri: :param description: Description string :return: None """ table = self._get_table_entity(table_uri=table_uri) self.client.entity.partial_update_entity_by_guid( entity_guid=table.entity.get("guid"), attr_value=description, attr_name='description' ) @_CACHE.cache('_get_user_defined_glossary_guid') def _get_user_defined_glossary_guid(self) -> str: """ This function look for a user defined glossary i.e., self.ATLAS_USER_DEFINED_TERMS If there is not one available, this will create a new glossary. The meain reason to put this functionality into a separate function is to avoid the lookup each time someone assigns a tag to a data source. :return: Glossary object, that holds the user defined terms. """ # Check if the user glossary already exists glossaries = self.client.glossary.get_all_glossaries() for glossary in glossaries: if glossary.get(self.QN_KEY) == self.AMUNDSEN_USER_TAGS: return glossary[self.GUID_KEY] # If not already exists, create one glossary_def = AtlasGlossary({"name": self.AMUNDSEN_USER_TAGS, "shortDescription": "Amundsen User Defined Terms"}) glossary = self.client.glossary.create_glossary(glossary_def) return glossary.guid @_CACHE.cache('_get_create_glossary_term') def _get_create_glossary_term(self, term_name: str) -> Union[AtlasGlossaryTerm, AtlasEntityHeader]: """ Since Atlas does not provide any API to find a term directly by a qualified name, we need to look for AtlasGlossaryTerm via basic search, if found then return, else create a new glossary term under the user defined glossary. :param term_name: Name of the term. NOTE: this is different from qualified name. :return: Term Object. """ params = { 'typeName': "AtlasGlossaryTerm", 'excludeDeletedEntities': True, 'includeSubTypes': True, 'attributes': ["assignedEntities", ], 'entityFilters': {'condition': "AND", 'criterion': [{'attributeName': "name", 'operator': "=", 'attributeValue': term_name}] } } result = self.client.discovery.faceted_search(search_parameters=params) if result.approximateCount: term = result.entities[0] else: glossary_guid = self._get_user_defined_glossary_guid() glossary_def = AtlasGlossaryHeader({'glossaryGuid': glossary_guid}) term_def = AtlasGlossaryTerm({'name': term_name, 'anchor': glossary_def}) term = self.client.glossary.create_glossary_term(term_def) return term def add_tag(self, *, id: str, tag: str, tag_type: str = "default", resource_type: ResourceType = ResourceType.Table) -> None: """ Assign the Glossary Term to the give table. If the term is not there, it will create a new term under the Glossary self.ATLAS_USER_DEFINED_TERMS :param id: Table URI / Dashboard ID etc. :param tag: Tag Name :param tag_type :return: None """ entity = self._get_table_entity(table_uri=id) term = self._get_create_glossary_term(tag) related_entity = AtlasRelatedObjectId({self.GUID_KEY: entity.entity[self.GUID_KEY], "typeName": resource_type.name}) self.client.glossary.assign_term_to_entities(term.guid, [related_entity]) def add_badge(self, *, id: str, badge_name: str, category: str = '', resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def delete_tag(self, *, id: str, tag: str, tag_type: str, resource_type: ResourceType = ResourceType.Table) -> None: """ Removes the Glossary Term assignment from the provided source. :param id: Table URI / Dashboard ID etc. :param tag: Tag Name :return:None """ entity = self._get_table_entity(table_uri=id) term = self._get_create_glossary_term(tag) if not term: return assigned_entities = self.client.glossary.get_entities_assigned_with_term(term.guid, "ASC", -1, 0) for item in assigned_entities or list(): if item.get(self.GUID_KEY) == entity.entity[self.GUID_KEY]: related_entity = AtlasRelatedObjectId(item) return self.client.glossary.disassociate_term_from_entities(term.guid, [related_entity]) def delete_badge(self, *, id: str, badge_name: str, category: str, resource_type: ResourceType) -> None: # Not implemented raise NotImplementedError def put_column_description(self, *, table_uri: str, column_name: str, description: str) -> None: """ :param table_uri: :param column_name: Name of the column to update the description :param description: The description string :return: None, as it simply updates the description of a column """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) col_guid = column_detail[self.GUID_KEY] self.client.entity.partial_update_entity_by_guid( entity_guid=col_guid, attr_value=description, attr_name='description' ) def get_column_description(self, *, table_uri: str, column_name: str) -> Union[str, None]: """ :param table_uri: :param column_name: :return: The column description using the referredEntities information of a table entity """ column_detail = self._get_column( table_uri=table_uri, column_name=column_name) return column_detail[self.ATTRS_KEY].get('description') def _serialize_popular_tables(self, entities: list) -> List[PopularTable]: """ Gets a list of entities and serialize the popular tables. :param entities: List of entities from atlas client :return: a list of PopularTable objects """ popular_tables = list() for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY) ) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables def get_popular_tables(self, *, num_entries: int, user_id: Optional[str] = None) -> List[PopularTable]: """ Generates a list of Popular tables to be shown on the home page of Amundsen. :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_query_params = {'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries} search_results = self.client.discovery.faceted_search(search_parameters=popular_query_params) return self._serialize_popular_tables(search_results.entities) def get_latest_updated_ts(self) -> int: date = None metrics = self.client.admin.get_metrics() try: date = self._parse_date(metrics.general.get('stats', {}).get('Notification:lastMessageProcessedTime')) except AttributeError: pass return date or 0 def get_tags(self) -> List: """ Fetch all the glossary terms from atlas, along with their assigned entities as this will be used to generate the autocomplete on the table detail page :return: A list of TagDetail Objects """ tags = [] params = { 'typeName': "AtlasGlossaryTerm", 'limit': 1000, 'offset': 0, 'excludeDeletedEntities': True, 'includeSubTypes': True, 'attributes': ["assignedEntities", ] } glossary_terms = self.client.discovery.faceted_search(search_parameters=params) for item in glossary_terms.entities or list(): tags.append( TagDetail( tag_name=item.attributes.get("name"), tag_count=len(item.attributes.get("assignedEntities")) ) ) return tags def get_badges(self) -> List: badges = list() metrics = self.client.admin.get_metrics() try: system_badges = metrics["tag"].get("tagEntities").keys() for item in system_badges: badges.append( Badge(badge_name=item, category="default") ) except AttributeError: LOGGER.info("No badges/classifications available in the system.") return badges def _get_resources_followed_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc followed by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ params = { 'typeName': self.BOOKMARK_TYPE, 'offset': '0', 'limit': '1000', 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [ { 'attributeName': self.QN_KEY, 'operator': 'contains', 'attributeValue': f'.{user_id}.bookmark' }, { 'attributeName': self.BOOKMARK_ACTIVE_KEY, 'operator': 'eq', 'attributeValue': 'true' } ] }, 'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY] } # Fetches the bookmark entities based on filters search_results = self.client.discovery.faceted_search(search_parameters=params) resources = [] for record in search_results.entities: table_info = self._extract_info_from_uri(table_uri=record.attributes[self.ENTITY_URI_KEY]) res = self._parse_bookmark_qn(record.attributes[self.QN_KEY]) resources.append(PopularTable( database=table_info['entity'], cluster=res['cluster'], schema=res['db'], name=res['table'])) return resources def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \ -> List[Union[PopularTable, DashboardSummary, Any]]: """ ToDo (Verdan): Dashboard still needs to be implemented. Helper function to get the resource, table, dashboard etc owned by a user. :param user_id: User ID of a user :param resource_type: Type of a resource that returns, could be table, dashboard etc. :return: A list of PopularTable, DashboardSummary or any other resource. """ resources = list() if resource_type == ResourceType.Table.name: type_regex = "(.*)_table$" entity_type = 'Table' # elif resource_type == ResourceType.Dashboard.name: # type_regex = "Dashboard" # entity_type = 'Dashboard' else: LOGGER.exception(f'Resource Type ({resource_type}) is not yet implemented') raise NotImplemented user_entity = self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_id)]).entity if not user_entity: LOGGER.exception(f'User ({user_id}) not found in Atlas') raise NotFoundException(f'User {user_id} not found.') resource_guids = set() for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list(): if (item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE and re.compile(type_regex).match(item['typeName'])): resource_guids.add(item[self.GUID_KEY]) owned_tables_query = f'{entity_type} where owner like "{user_id.lower()}*" and __state = "ACTIVE"' table_entities = self.client.discovery.dsl_search(owned_tables_query) for table in table_entities.entities or list(): resource_guids.add(table.guid) if resource_guids: resource_guids_chunks = AtlasProxy.split_list_to_chunks(list(resource_guids), 100) for chunk in resource_guids_chunks: entities = self.client.entity.get_entities_by_guids(guids=list(chunk), ignore_relationships=True) if resource_type == ResourceType.Table.name: resources += self._serialize_popular_tables(entities.entities) else: LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"') return resources @staticmethod def split_list_to_chunks(input_list: List[Any], n: int) -> Generator: """Yield successive n-sized chunks from lst.""" for i in range(0, len(input_list), n): yield input_list[i:i + n] def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \ -> Dict[str, List[DashboardSummary]]: pass def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]: tables = list() if relation_type == UserResourceRel.follow: tables = self._get_resources_followed_by_user(user_id=user_email, resource_type=ResourceType.Table.name) elif relation_type == UserResourceRel.own: tables = self._get_resources_owned_by_user(user_id=user_email, resource_type=ResourceType.Table.name) return {'table': tables} def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, List[PopularTable]]: user = self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE, uniq_attributes=[(self.QN_KEY, user_email)]).entity readers_guids = [] for user_reads in user['relationshipAttributes'].get('entityReads'): entity_status = user_reads['entityStatus'] relationship_status = user_reads['relationshipStatus'] if entity_status == Status.ACTIVE and relationship_status == Status.ACTIVE: readers_guids.append(user_reads['guid']) readers = self.client.entity.get_entities_by_guids(guids=list(readers_guids), ignore_relationships=True) _results = {} for reader in readers.entities or list(): entity_uri = reader.attributes.get(self.ENTITY_URI_KEY) count = reader.attributes.get('count') if count: details = self._extract_info_from_uri(table_uri=entity_uri) _results[count] = dict(cluster=details.get('cluster'), name=details.get('name'), schema=details.get('db'), database=details.get('entity')) sorted_counts = sorted(_results.keys()) results = [] for count in sorted_counts: data: dict = _results.get(count, dict()) table = PopularTable(**data) results.append(table) return {'table': results} def add_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented('resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = True entity.update() def delete_resource_relation_by_user(self, *, id: str, user_id: str, relation_type: UserResourceRel, resource_type: ResourceType) -> None: if resource_type is not ResourceType.Table: raise NotImplemented('resource type {} is not supported'.format(resource_type)) entity = self._get_bookmark_entity(entity_uri=id, user_id=user_id) entity.entity[self.ATTRS_KEY][self.BOOKMARK_ACTIVE_KEY] = False entity.update() def _parse_date(self, date: int) -> Optional[int]: try: date_str = str(date) date_trimmed = date_str[:10] assert len(date_trimmed) == 10 return int(date_trimmed) except Exception: return None def _get_readers(self, entity: AtlasEntityWithExtInfo, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = self.client.entity.get_entities_by_guids(guids=list(guids), ignore_relationships=False) _result = [] for _reader in readers.entities or list(): read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result def _get_programmatic_descriptions(self, parameters: dict) -> List[ProgrammaticDescription]: programmatic_descriptions: Dict[str, ProgrammaticDescription] = {} for source, text in parameters.items(): use_parameter = True for regex_filter in app.config['PROGRAMMATIC_DESCRIPTIONS_EXCLUDE_FILTERS']: pattern = re.compile(regex_filter) if pattern.match(source): use_parameter = False break if use_parameter: source = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", source).lower() programmatic_descriptions[source] = ProgrammaticDescription(source=source, text=text) result = dict(sorted(programmatic_descriptions.items())) return list(result.values()) def get_dashboard(self, dashboard_uri: str, ) -> DashboardDetailEntity: pass def get_dashboard_description(self, *, id: str) -> Description: pass def put_dashboard_description(self, *, id: str, description: str) -> None: pass def get_resources_using_table(self, *, id: str, resource_type: ResourceType) -> Dict[str, List[DashboardSummary]]: return {} def get_lineage(self, *, id: str, resource_type: ResourceType, direction: str, depth: int) -> Lineage: pass
def setup_cache(self): '''Set up a beaker cache manager.''' self.cachemgr = CacheManager(**parse_cache_config_options(cache_opts)) self.image_cache = self.cachemgr.get_cache('images', expires=1800) self.flavor_cache = self.cachemgr.get_cache('flavors', expires=1800)
def _initialize_cache(self): cache_config = parse_cache_config_options(self.cache_config) log.info('Initializing beaker cache: %s' % cache_config) self.cache = CacheManager(**cache_config)
import os from beaker.cache import CacheManager from beaker.util import parse_cache_config_options cache = CacheManager(**parse_cache_config_options({ 'cache.type': 'file', 'cache.data_dir': os.path.join(os.getcwd(), 'cache', 'data'), 'cache.lock_dir': os.path.join(os.getcwd(), 'cache', 'lock'), 'expire': '86400', }))
'cache.data_dir': '/tmp/cache/data', 'cache.lock_dir': '/tmp/cache/lock' } # Create cache regions cache_regions.update({ 'day_term': { 'expire': 86400 }, 'hour_term': { 'expire': 3600 } }) # Create cache for this instance cache = CacheManager(**parse_cache_config_options(cache_opts)) class APIManager: def __init__(self): atexit.register(self.__on_exit) # Load the bill texts from the persistent cache if it's less than a week old self.bill_texts = self.__load_pickle("pickles/billtext_cache.p") self.bill_subjects = self.__load_pickle("pickles/billsubject_cache.p") # Since I have a limited number of API calls to LegiScan, keep track of the number of calls we're making self.legiscan_call_count = 0 def get_cached_bill_texts(self): return self.bill_texts
from beaker.cache import CacheManager from beaker.util import parse_cache_config_options from neo4j.v1 import BoltStatementResult from neo4j.v1 import GraphDatabase, Driver # noqa: F401 from metadata_service.entity.popular_table import PopularTable from metadata_service.entity.table_detail import Application, Column, Reader, Source, \ Statistics, Table, Tag, User, Watermark from metadata_service.entity.tag_detail import TagDetail from metadata_service.entity.user_detail import User as UserEntity from metadata_service.exception import NotFoundException from metadata_service.proxy.base_proxy import BaseProxy from metadata_service.proxy.statsd_utilities import timer_with_counter from metadata_service.util import UserResourceRel _CACHE = CacheManager(**parse_cache_config_options({'cache.type': 'memory'})) # Expire cache every 11 hours + jitter _GET_POPULAR_TABLE_CACHE_EXPIRY_SEC = 11 * 60 * 60 + randint(0, 3600) LOGGER = logging.getLogger(__name__) class Neo4jProxy(BaseProxy): """ A proxy to Neo4j (Gateway to Neo4j) """ def __init__(self, *, host: str, port: int,