class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._ssl_cert_ca_verify = ssl_cert_ca_verify if self._security_enabled: self._client.set_kerberos_auth() if ssl_cert_ca_verify: self._client.set_verify(True) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException, e: raise ManagerApiException(e)
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith( 'This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) @classmethod def _get_json(cls, response): if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError, e: # Got some null bytes in the response LOG.error('%s: %s' % (unicode(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException, e: raise ManagerApiException(e)
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
def get_log_client(log_link): global _log_client_queue global MAX_HEAP_SIZE _log_client_lock.acquire() try: components = urlparse.urlsplit(log_link) base_url = '%(scheme)s://%(netloc)s' % { 'scheme': components[0], 'netloc': components[1] } # Takes on form (epoch time, client object) # Least Recently Used algorithm. client_tuple = next((tup for tup in _log_client_heap if tup[1].base_url == base_url), None) if client_tuple is None: client = HttpClient(base_url, LOG) yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get(): client.set_kerberos_auth() else: _log_client_heap.remove(client_tuple) client = client_tuple[1] new_client_tuple = (time.time(), client) if len(_log_client_heap) >= MAX_HEAP_SIZE: heapq.heapreplace(_log_client_heap, new_client_tuple) else: heapq.heappush(_log_client_heap, new_client_tuple) return client finally: _log_client_lock.release()
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "NodeManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
def _query_store_proxy(request, path=None): response = {'status': -1} headers = { 'x-do-as': request.user.username, 'X-Requested-By': 'das', 'Content-Type': 'application/json; charset=UTF-8' } client = HttpClient(QUERY_STORE.SERVER_URL.get()) resource = Resource(client) if USE_SASL.get(): client.set_kerberos_auth() try: response = resource.invoke(request.method, path, request.GET.dict(), request.body, headers) except RestException as e: ex_response = e.get_parent_ex().response if ex_response is not None: response['code'] = ex_response.status_code response['message'] = ex_response.reason response['content'] = ex_response.text else: response['message'] = 'Query store not reachable!' response['content'] = e.message return response
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})
def get_log_client(log_link): global _log_client_queue global MAX_HEAP_SIZE _log_client_lock.acquire() try: components = urlparse.urlsplit(log_link) base_url = '%(scheme)s://%(netloc)s' % { 'scheme': components[0], 'netloc': components[1] } # Takes on form (epoch time, client object) # Least Recently Used algorithm. client_tuple = next( (tup for tup in _log_client_heap if tup[1].base_url == base_url), None) if client_tuple is None: client = HttpClient(base_url, LOG) yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get(): client.set_kerberos_auth() else: _log_client_heap.remove(client_tuple) client = client_tuple[1] new_client_tuple = (time.time(), client) if len(_log_client_heap) >= MAX_HEAP_SIZE: heapq.heapreplace(_log_client_heap, new_client_tuple) else: heapq.heappush(_log_client_heap, new_client_tuple) return client finally: _log_client_lock.release()
class NodeManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "NodeManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, "ws", _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "NodeManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get("node/containers", headers={"Accept": _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get( "node/containers/%(container_id)s" % {"container_id": container_id}, headers={"Accept": _JSON_CONTENT_TYPE} )
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user), ) return ( ('user.name', DEFAULT_USER), ('doAs', self._user), ) @classmethod def _get_json(cls, response): if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError, e: # Got some null bytes in the response LOG.error('%s: %s' % (unicode(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url): self._url = solr_url self._client = HttpClient(self._url, logger=LOG) if SECURITY_ENABLED.get(): self._client.set_kerberos_auth() self._root = Resource(self._client) def query(self, solr_query, hue_core): try: params = ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq),) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' response = json.loads(response) return response except RestException, e: raise PopupException('Error while accessing Solr: %s' % e)
def query_store_api(request, path=None): response = {'status': -1} if USE_PROXY.get(): content_type = 'application/json; charset=UTF-8' headers = {'X-Requested-By': 'das', 'Content-Type': content_type} client = HttpClient(QUERY_STORE.SERVER_URL.get()) resource = Resource(client) if USE_SASL.get(): client.set_kerberos_auth() try: response = resource.invoke(request.method, path, request.GET.dict(), request.body, headers) except RestException as e: ex_response = e.get_parent_ex().response response['code'] = ex_response.status_code response['message'] = ex_response.reason response['content'] = ex_response.text else: if path == 'api/query/search': filters = json.loads(request.body) resp = get_api(request.user, interface='queries-hive').apps(filters['search']) response = resp['apps'] return JsonResponse(response)
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG) self._data = None self._headers = None self._wbuf = buffer_writer() def open(self): pass def set_kerberos_auth(self, service="HTTP"): self._client.set_kerberos_auth(service=service) def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_bearer_auth(self, token): self._client.set_bearer_auth(token) def set_verify(self, verify=True): self._client.set_verify(verify) def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): if not self._headers: self._headers = {} self._headers.update(timeout=str(int(ms / 1000))) def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): data = self._wbuf.getvalue() self._wbuf = buffer_writer() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data, headers=self._headers)
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url, ) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers) def stages(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers) def executors(self, app_id, attempt_id): return self._root.get( 'applications/%(app_id)s/%(attempt_id)s/executors' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers)
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url, cert_validate=True): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG, cert_validate=cert_validate) self._data = None self._headers = None self._wbuf = StringIO() def open(self): pass def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_kerberos_auth(self): self._client.set_kerberos_auth() def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): pass def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): if self.isOpen(): self.close() self.open() data = self._wbuf.getvalue() self._wbuf = StringIO() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data)
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url, cert_validate=True): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG, cert_validate=cert_validate) self._data = None self._headers = None self._wbuf = StringIO() def open(self): pass def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_kerberos_auth(self): self._client.set_kerberos_auth() def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): pass def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): if self.isOpen(): self.close() self.open() data = self._wbuf.getvalue() self._wbuf = StringIO() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data)
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "MapreduceApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user), ) return ( ('user.name', DEFAULT_USER), ('doAs', self._user), ) def query(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq), ) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError, e: # Got some null bytes in the response LOG.error('%s: %s' % (unicode(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
def query_store_download_bundle(request, id=None): response = {} client = HttpClient(QUERY_STORE.SERVER_URL.get()) resource = Resource(client) if USE_SASL.get(): client.set_kerberos_auth() app = resource.get('api/data-bundle/' + id) response = FileResponse((app, 'rb'), content_type='application/octet-stream') response['Content-Disposition'] = 'attachment; filename=' + id + '.zip' return response
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
def _create_query_store_client(request, content_type='application/json; charset=UTF-8'): headers = { 'x-do-as': request.user.username, 'X-Requested-By': 'das', 'Content-Type': content_type } client = HttpClient(QUERY_STORE.SERVER_URL.get()) client.set_headers(headers) if USE_SASL.get(): client.set_kerberos_auth() return client
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def query(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq),) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError, e: # Got some null bytes in the response LOG.error('%s: %s' % (unicode(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def stages(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def executors(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
class SparkJob(Application): def __init__(self, job, rm_api=None, hs_api=None): super(SparkJob, self).__init__(job, rm_api) self._resolve_tracking_url() if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api: self.history_server_api = hs_api self._get_metrics() @property def logs_url(self): log_links = self.history_server_api.get_executors_loglinks(self) return log_links[ 'stdout'] if log_links and 'stdout' in log_links else '' @property def attempt_id(self): return self.trackingUrl.strip('/').split('/')[-1] def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e) finally:
class SparkJob(Application): def __init__(self, job, rm_api=None, hs_api=None): super(SparkJob, self).__init__(job, rm_api) self._resolve_tracking_url() if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api: self.history_server_api = hs_api self._get_metrics() @property def logs_url(self): log_links = self.history_server_api.get_executors_loglinks(self) return log_links['stdout'] if log_links and 'stdout' in log_links else '' @property def attempt_id(self): return self.trackingUrl.strip('/').split('/')[-1] def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e) finally:
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url): self._url = solr_url self._client = HttpClient(self._url, logger=LOG) if SECURITY_ENABLED.get(): self._client.set_kerberos_auth() self._root = Resource(self._client) def query(self, solr_query, hue_core): try: params = ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq), ) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' response = json.loads(response) return response except RestException, e: raise PopupException('Error while accessing Solr: %s' % e)
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id): return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers) def stages(self, app_id): return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers) def executors(self, job): LOG.debug("Getting executors for Spark job %s" % job.jobId) app_id = self.get_real_app_id(job) if not app_id: return [] return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers) def stage_attempts(self, app_id, stage_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers) def stage_attempt(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_summary(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_list(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def storages(self, app_id): return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers) def storage(self, app_id, rdd_id): return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers) def download_logs(self, app_id): return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers) def download_attempt_logs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def download_executors_logs(self, request, job, name, offset): log_links = self.get_executors_loglinks(job) return self.retrieve_log_content(log_links, name, request.user.username, offset) def download_executor_logs(self, user, executor, name, offset): return self.retrieve_log_content(executor['logs'], name, user.username, offset) def retrieve_log_content(self, log_links, log_name, username, offset): params = { 'doAs': username } if offset != 0: params['start'] = offset if not log_name or not log_name == 'stderr': log_name = 'stdout' log = '' if log_links and log_name in log_links: log_link = log_links[log_name] root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False) response = root.get('', params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() return log def get_executors_loglinks(self, job): executor = None if job.metrics and 'executors' in job.metrics and job.metrics['executors']: executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver'] # look up driver executor if not executors: executor = job.metrics['executors'][0] else: executor = executors[0] return None if not executor else executor[12] def get_real_app_id(self, job): # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases. # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts. try: jobs_json = self.applications() job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId] if not job_filtered_json: return {} attempts = job_filtered_json[0]['attempts'] if len(attempts) == 1: app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId'] else: app_id = job.jobId + '/%d' % len(attempts) LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId)) except Exception as e: LOG.error('Cannot get real app id %s: %s' % (job.jobId, e)) app_id = None return app_id
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__( self, solr_url, user, security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get(), ): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke("HEAD", "/") def query(self, collection, query): solr_query = {} solr_query["collection"] = collection["name"] if query.get("download"): solr_query["rows"] = 1000 solr_query["start"] = 0 else: solr_query["rows"] = int(collection["template"]["rows"] or 10) solr_query["start"] = int(query["start"]) solr_query["rows"] = min(solr_query["rows"], 1000) solr_query["start"] = min(solr_query["start"], 10000) params = self._get_params() + ( ("q", self._get_q(query)), ("wt", "json"), ("rows", solr_query["rows"]), ("start", solr_query["start"]), ) if any(collection["facets"]): params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10)) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection["facets"]: if facet["type"] == "query": params += (("facet.query", "%s" % facet["field"]),) elif facet["type"] == "range" or facet["type"] == "range-up": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], "mincount": int(facet["properties"]["mincount"]), } if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "histogram-widget" ) ): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ( "facet.range", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "field": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } params += ( ( "facet.field", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "nested": _f = { "field": facet["field"], "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "text-facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), "sort": {"count": facet["properties"]["sort"]}, } print facet if facet["properties"]["domain"].get("blockParent") or facet["properties"]["domain"].get( "blockChildren" ): _f["domain"] = {} if facet["properties"]["domain"].get("blockParent"): _f["domain"]["blockParent"] = " OR ".join(facet["properties"]["domain"]["blockParent"]) if facet["properties"]["domain"].get("blockChildren"): _f["domain"]["blockChildren"] = " OR ".join(facet["properties"]["domain"]["blockChildren"]) if "start" in facet["properties"] and not facet["properties"].get("type") == "field": _f.update( { "type": "range", "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], } ) if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "bucket-widget" ) ): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update( { "type": "terms", "field": facet["field"], "excludeTags": facet["id"], "offset": 0, "numBuckets": True, "allBuckets": True, "prefix": "", } ) if facet["properties"]["canRange"] and not facet["properties"]["isDate"]: del _f["mincount"] # Numeric fields do not support if facet["properties"]["facets"]: self._n_facet_dimension(facet, _f, facet["properties"]["facets"], 1) if facet["widgetType"] == "text-facet-widget": _fname = _f["facet"].keys()[0] _f["sort"] = {_fname: facet["properties"]["sort"]} # domain = '-d2:NaN' # Solr 6.4 json_facets[facet["id"]] = _f elif facet["type"] == "function": json_facets[facet["id"]] = self._get_aggregate_function(facet) json_facets["processEmpty"] = True elif facet["type"] == "pivot": if facet["properties"]["facets"] or facet["widgetType"] == "map-widget": fields = facet["field"] fields_limits = [] for f in facet["properties"]["facets"]: fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"])) fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"])) fields += "," + f["field"] keys = { "id": "%(id)s" % facet, "key": "%(field)s-%(id)s" % facet, "field": facet["field"], "fields": fields, "limit": int(facet["properties"].get("limit", 10)), "mincount": int(facet["properties"]["mincount"]), "fields_limits": " ".join(fields_limits), } params += ( ( "facet.pivot", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s" % keys, ), ) if json_facets: params += (("json.facet", json.dumps(json_facets)),) params += self._get_fq(collection, query) if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]: fields = set( collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else [] ) # Add field if needed if collection["template"]["leafletmap"].get("latitudeField"): fields.add(collection["template"]["leafletmap"]["latitudeField"]) if collection["template"]["leafletmap"].get("longitudeField"): fields.add(collection["template"]["leafletmap"]["longitudeField"]) if collection["template"]["leafletmap"].get("labelField"): fields.add(collection["template"]["leafletmap"]["labelField"]) fl = urllib.unquote(utf_quoter(",".join(list(fields)))) else: fl = "*" nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % " OR ".join(nested_fields))) params += (("fl", fl),) params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000)) if collection["template"]["fieldsSelected"]: fields = [] for field in collection["template"]["fieldsSelected"]: attribute_field = filter( lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"] ) if attribute_field: if attribute_field[0]["sort"]["direction"]: fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"])) if fields: params += (("sort", ",".join(fields)),) response = self._root.get("%(collection)s/select" % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim): facet = facets[0] f_name = "dim_%02d:%s" % (dim, facet["field"]) if facet["aggregate"]["function"] == "count": if "facet" not in _f: _f["facet"] = {f_name: {}} else: _f["facet"][f_name] = {} _f = _f["facet"] _f[f_name] = { "type": "terms", "field": "%(field)s" % facet, "limit": int(facet.get("limit", 10)), "mincount": int(facet["mincount"]), "numBuckets": True, "allBuckets": True, "prefix": "", } if widget["widgetType"] == "tree2-widget" and facets[-1]["aggregate"]["function"] != "count": _f["subcount"] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]["aggregate"]["function"] == "count": self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim) else: agg_function = self._get_aggregate_function(facet) _f["facet"] = {"agg_%02d_00:%s" % (dim, agg_function): agg_function} for i, _f_agg in enumerate(facets[1:], 1): if _f_agg["aggregate"]["function"] != "count": agg_function = self._get_aggregate_function(_f_agg) _f["facet"]["agg_%02d_%02d:%s" % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1) # Get n+1 dimension break def suggest(self, collection, query): try: params = self._get_params() + ( ("suggest", "true"), ("suggest.build", "true"), ("suggest.q", query["q"]), ("wt", "json"), ) if query.get("dictionary"): params += (("suggest.dictionary", query["dictionary"]),) response = self._root.get("%s/suggest" % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_("Error while accessing Solr"))
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By' : 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class SqoopClient(object): STATUS_GOOD = ('FINE', 'ACCEPTABLE') STATUS_BAD = ('UNACCEPTABLE', 'FAILURE_ON_SUBMIT') def __init__(self, url, username, language='en'): self._url = url self._client = HttpClient(self._url, logger=LOG) self._root = SqoopResource(self._client) self._language = language self._username = username if has_sqoop_has_security(): self._client.set_kerberos_auth() self._security_enabled = has_sqoop_has_security() def __str__(self): return "SqoopClient at %s with security %s" % (self._url, self._security_enabled) @property def url(self): return self._url @property def headers(self): return { 'Accept': 'application/json', 'Accept-Language': self._language, 'sqoop-user-name': self._username } def get_version(self): return self._root.get('version', headers=self.headers) def get_driver(self): resp_dict = self._root.get('%s/driver' % API_VERSION, headers=self.headers) driver = Driver.from_dict(resp_dict) return driver def get_connectors(self): resp_dict = self._root.get('%s/connectors' % API_VERSION, headers=self.headers) connectors = [ Connector.from_dict(connector_dict) for connector_dict in resp_dict['connectors'] ] return connectors def get_connector(self, connector_id): resp_dict = self._root.get('%s/connector/%d/' % (API_VERSION, connector_id), headers=self.headers) if resp_dict['connector']: return Connector.from_dict(resp_dict['connector']) return None def get_links(self): resp_dict = self._root.get('%s/links' % API_VERSION, headers=self.headers) links = [Link.from_dict(link_dict) for link_dict in resp_dict['links']] return links def get_link(self, link_id): resp_dict = self._root.get('%s/link/%d/' % (API_VERSION, link_id), headers=self.headers) if resp_dict['link']: return Link.from_dict(resp_dict['link']) return None def create_link(self, link): link.creation_date = int( round(time.time() * 1000) ) link.update_date = link.creation_date link_dict = link.to_dict() request_dict = { 'link': link_dict } resp = self._root.post('%s/link/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) link.id = resp['id'] return link def update_link(self, link): if not link.link_config_values: link.link_config_values = self.get_connectors()[0].link_config link.updated = int( round(time.time() * 1000) ) link_dict = link.to_dict() request_dict = { 'link': link_dict } resp = self._root.put('%s/link/%d/' % (API_VERSION, link.id), data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) return link def delete_link(self, link): resp = self._root.delete('%s/link/%d/' % (API_VERSION, link.id), headers=self.headers) return None def get_jobs(self): resp_dict = self._root.get('%s/jobs' % API_VERSION, headers=self.headers) jobs = [Job.from_dict(job_dict) for job_dict in resp_dict['jobs']] return jobs def get_job(self, job_id): resp_dict = self._root.get('%s/job/%d/' % (API_VERSION, job_id), headers=self.headers) if resp_dict['job']: return Job.from_dict(resp_dict['job']) return None def create_job(self, job): if not job.from_config_values: job.from_config_values = self.get_connectors()[0].job_config['FROM'] if not job.to_config_values: job.to_config_values = self.get_connectors()[0].job_config['TO'] if not job.driver_config_values: job.driver_config_values = self.get_driver().job_config job.creation_date = int( round(time.time() * 1000) ) job.update_date = job.creation_date job_dict = job.to_dict() request_dict = { 'job': job_dict } resp = self._root.post('%s/job/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers) if 'id' not in resp: raise SqoopException.from_dicts(resp['validation-result']) job.id = resp['id'] return job def update_job(self, job): if not job.from_config_values: job.from_config_values = self.get_connectors()[0].job_config['FROM'] if not job.to_config_values: job.to_config_values = self.get_connectors()[0].job_config['TO'] if not job.driver_config_values: job.driver_config_values = self.get_driver().job_config job.updated = int( round(time.time() * 1000) ) job_dict = job.to_dict() request_dict = { 'job': job_dict } resp = self._root.put('%s/job/%d/' % (API_VERSION, job.id), data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) return job def delete_job(self, job): resp_dict = self._root.delete('%s/job/%s' % (API_VERSION, job.id), headers=self.headers) return None def get_job_status(self, job): resp_dict = self._root.get('%s/job/%d/status' % (API_VERSION, job.id), headers=self.headers) return Submission.from_dict(resp_dict['submission']) def start_job(self, job): resp_dict = self._root.put('%s/job/%d/start' % (API_VERSION, job.id), headers=self.headers) if resp_dict['submission']['status'] in SqoopClient.STATUS_BAD: raise SqoopSubmissionException.from_dict(resp_dict['submission']) return Submission.from_dict(resp_dict['submission']) def stop_job(self, job): resp_dict = self._root.put('%s/job/%d/stop' % (API_VERSION, job.id), headers=self.headers) return Submission.from_dict(resp_dict['submission']) def get_submissions(self): resp_dict = self._root.get('%s/submissions' % API_VERSION, headers=self.headers) submissions = [Submission.from_dict(submission_dict) for submission_dict in resp_dict['submissions']] return submissions def set_user(self, user): self._user = user def set_language(self, language): self._language = language
class ImpalaDaemonApi(object): def __init__(self, server_url): self._url = server_url self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = is_kerberos_enabled() self._webserver_spnego_enabled = is_webserver_spnego_enabled() self._thread_local = threading.local() # You can set username/password for Impala Web UI which overrides kerberos if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get( ) is not None: if DAEMON_API_AUTH_SCHEME.get().lower() == 'basic': self._client.set_basic_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get()) LOG.info( "Using username and password for basic authentication") else: self._client.set_digest_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get()) LOG.info( 'Using username and password for digest authentication') elif self._webserver_spnego_enabled or self._security_enabled: self._client.set_kerberos_auth() LOG.info('Using kerberos principal for authentication') def __str__(self): return "ImpalaDaemonApi at %s" % self._url @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def set_user(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_queries(self): params = {'json': 'true'} resp = self._root.get('queries', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi did not return valid JSON: %s' % e) def get_query(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_plan', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi did not return valid JSON: %s' % e) def get_query_profile(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_profile', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_profile did not return valid JSON: %s' % e) def get_query_memory(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_memory', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_memory did not return valid JSON: %s' % e) def kill(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('cancel_query', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi kill did not return valid JSON: %s' % e) def get_query_backends(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_backends', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_backends did not return valid JSON: %s' % e) def get_query_finstances(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_finstances', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_finstances did not return valid JSON: %s' % e) def get_query_summary(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_summary', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_summary did not return valid JSON: %s' % e) def get_query_profile_encoded(self, query_id): params = {'query_id': query_id} return self._root.get('query_profile_encoded', params=params)
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime') VALID_LOG_FILTERS = {'recent', 'limit', 'loglevel', 'text'} def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid,), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid,), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False, api_version=API_VERSION): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info self._thread_local = threading.local() self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def _get_params(self): if self.security_enabled: return {"doAs": self.user, "timezone": TIME_ZONE.get()} return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()} def _get_oozie_properties(self, properties=None): defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ("name", "user", "group", "status") def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) params["jobtype"] = jobtype filter_list = [] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) if jobtype == "wf": wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == "coord": wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs("wf", offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs("coord", offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs("bundle", offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({"len": -1}) resp = self._root.get("job/%s" % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_action(self, action_id): if "C@" in action_id: Klass = CoordinatorAction elif "B@" in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get("job/%s" % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ("start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun"): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params["action"] = action if parameters is not None: params.update(parameters) return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params["action"] = "rerun" return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get("sla", params) return resp["slaSummaryList"]
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager().kill(app_id) # We need to call the RM
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, API_VERSION) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store user info self._thread_local = threading.local() def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER def setuser(self, user): """Return the previous user""" prev = self.user self._thread_local.user = user return prev def _get_params(self): if self.security_enabled: return {"doAs": self.user} return {"user.name": DEFAULT_USER, "doAs": self.user} VALID_JOB_FILTERS = ("name", "user", "group", "status") def get_jobs(self, offset=None, cnt=None, **kwargs): """ get_jobs(offset=None, cnt=None, **kwargs) -> WorkflowList Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) filter_list = [] for key, val in kwargs: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) wf_list = WorkflowList(self, resp, filters=kwargs) return wf_list def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" xml = self._root.get("job/%s" % (jobid,), params) return xml def job_control(self, jobid, action): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ("start", "suspend", "resume", "kill"): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) params = self._get_params() params["action"] = action self._root.put("job/%s" % (jobid,), params) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, username, properties=None) -> jobid Submit a job to Oozie. May raise PopupException. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults else: properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): """ get_instrumentation() -> Oozie instrumentation (dictionary) """ params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException as e: raise ManagerApiException(e) def get_spark_history_server_configs(self, cluster_name=None): service_name = "SPARK_ON_YARN" shs_role_type = "SPARK_YARN_HISTORY_SERVER" try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] if service_display_names: spark_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name })['items'] shs_server_names = [ server['name'] for server in servers if server['type'] == shs_role_type ] shs_server_name = shs_server_names[ 0] if shs_server_names else None shs_server_hostRef = [ server['hostRef'] for server in servers if server['type'] == shs_role_type ] shs_server_hostId = shs_server_hostRef[0][ 'hostId'] if shs_server_hostRef else None if shs_server_name and shs_server_hostId: shs_server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name, 'shs_server_name': shs_server_name }, params={'view': 'full'})['items'] return shs_server_hostId, shs_server_configs except Exception as e: LOG.warning("Check Spark History Server via ManagerApi: %s" % e) return None, None def get_spark_history_server_url(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_hostId and shs_server_configs: shs_ui_port = None shs_ssl_port = None shs_ssl_enabled = None for config in shs_server_configs: if 'relatedName' in config and 'default' in config: if config['relatedName'] == 'spark.history.ui.port': shs_ui_port = config['default'] if config['relatedName'] == 'spark.ssl.historyServer.port': shs_ssl_port = config['default'] if config[ 'relatedName'] == 'spark.ssl.historyServer.enabled': shs_ssl_enabled = config['default'] shs_ui_host = self._root.get('hosts/%(hostId)s' % {'hostId': shs_server_hostId}) shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None return self.assemble_shs_url(shs_ui_hostname, shs_ui_port, shs_ssl_port, shs_ssl_enabled) return None def get_spark_history_server_security_enabled(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_configs: for config in shs_server_configs: if 'relatedName' in config and 'default' in config and config[ 'relatedName'] == 'history_server_spnego_enabled': shs_security_enabled = config['default'] return shs_security_enabled and shs_security_enabled == 'true' return False def assemble_shs_url(self, shs_ui_hostname, shs_ui_port=None, shs_ssl_port=None, shs_ssl_enabled=None): if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled: LOG.warning("Spark conf not found!") return None protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http' shs_url = '%(protocol)s://%(hostname)s:%(port)s' % { 'protocol': protocol, 'hostname': shs_ui_hostname, 'port': shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port, } return shs_url def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException as e: raise ManagerApiException(e) def get_kafka_brokers(self, cluster_name=None): try: hosts = self._get_hosts('KAFKA', 'KAFKA_BROKER', cluster_name=cluster_name) brokers_hosts = [host['hostname'] + ':9092' for host in hosts] return ','.join(brokers_hosts) except RestException as e: raise ManagerApiException(e) def get_kudu_master(self, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == 'KUDU' ][0] master = self._get_roles(cluster['name'], service['name'], 'KUDU_MASTER')[0] master_host = self._root.get('hosts/%(hostId)s' % master['hostRef']) return master_host['hostname'] except RestException as e: raise ManagerApiException(e) def get_kafka_topics(self, broker_host): try: client = HttpClient('http://%s:24042' % broker_host, logger=LOG) root = Resource(client) return root.get('/api/topics') except RestException as e: raise ManagerApiException(e) def update_flume_config(self, cluster_name, config_name, config_value): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roleConfigGroup = [ role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] data = { u'items': [{ u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.' .replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace( '%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), u'body': { u'items': [{ u'name': config_name, u'value': config_value }] }, u'contentType': u'application/json', u'method': u'PUT' }] } return self.batch(items=data) def get_flume_agents(self, cluster_name=None): return [ host['hostname'] for host in self._get_hosts( 'FLUME', 'AGENT', cluster_name=cluster_name) ] def _get_hosts(self, service_name, role_name, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == service_name ][0] hosts = self._get_roles(cluster['name'], service['name'], role_name) hosts_ids = [host['hostRef']['hostId'] for host in hosts] hosts = self._root.get('hosts')['items'] return [host for host in hosts if host['hostId'] in hosts_ids] except RestException as e: raise ManagerApiException(e) def refresh_flume(self, cluster_name, restart=False): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roles = [ role['name'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] if restart: return self.restart_services(cluster['name'], service, roles) else: return self.refresh_configs(cluster['name'], service, roles) def refresh_configs(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def restart_services(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def batch(self, items): try: return self._root.post('batch', data=json.dumps(items), contenttype='application/json') except RestException as e: raise ManagerApiException(e) def _get_cluster(self, cluster_name=None): clusters = self._root.get('clusters/')['items'] if cluster_name is not None: cluster = [ cluster for cluster in clusters if cluster['name'] == cluster_name ][0] else: cluster = clusters[0] return cluster def _get_roles(self, cluster_name, service_name, role_type): roles = self._root.get( 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % { 'cluster_name': cluster_name, 'service_name': service_name })['items'] return [role for role in roles if role['type'] == role_type] def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): if not key or not impalad_host: return None service_name = "IMPALA" role_type = 'IMPALAD' try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] hosts = self._root.get('hosts')['items'] impalad_hostIds = [ host['hostId'] for host in hosts if host['hostname'] == impalad_host ] if impalad_hostIds and service_display_names: impalad_hostId = impalad_hostIds[0] impala_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name })['items'] impalad_server_names = [ server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId ] impalad_server_name = impalad_server_names[ 0] if impalad_server_names else None if impalad_server_name: server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name, 'shs_server_name': impalad_server_name }, params={'view': 'full'})['items'] for config in server_configs: if 'relatedName' in config and 'value' in config: if config['relatedName'] == key: return config['value'] except Exception as e: LOG.warning( "Get Impala Daemon API configurations via ManangerAPI: %s" % e) return None
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8') def _get_aggregate_function(self, facet): props = { 'field': facet['field'], 'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate'] } if props['aggregate'] == 'median': return 'percentile(%(field)s,50)' % props else: return '%(aggregate)s(%(field)s)' % props def _get_range_borders(self, collection, query): props = {} GAPS = { '5MINUTES': { 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots }, '30MINUTES': { 'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'bucket-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'bar-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'facet-widget': {'coeff': '+5', 'unit': 'MINUTES'}, }, '1HOURS': { 'histogram-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'bucket-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'bar-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'facet-widget': {'coeff': '+10', 'unit': 'MINUTES'}, }, '12HOURS': { 'histogram-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+1', 'unit': 'HOURS'}, }, '1DAYS': { 'histogram-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+3', 'unit': 'HOURS'}, }, '2DAYS': { 'histogram-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+6', 'unit': 'HOURS'}, }, '7DAYS': { 'histogram-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'bucket-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'bar-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'facet-widget': {'coeff': '+1', 'unit': 'DAYS'}, }, '1MONTHS': { 'histogram-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'bucket-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'bar-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'facet-widget': {'coeff': '+5', 'unit': 'DAYS'}, }, '3MONTHS': { 'histogram-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+30', 'unit': 'DAYS'}, }, '1YEARS': { 'histogram-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+12', 'unit': 'MONTHS'}, }, '2YEARS': { 'histogram-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+3', 'unit': 'MONTHS'}, }, '10YEARS': { 'histogram-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'bucket-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'bar-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'facet-widget': {'coeff': '+1', 'unit': 'YEARS'}, } } time_field = collection['timeFilter'].get('field') if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'): # fqs overrides main time filter fq_time_ids = [fq['id'] for fq in query['fqs'] if fq['field'] == time_field] props['time_filter_overrides'] = fq_time_ids props['time_field'] = time_field if collection['timeFilter']['type'] == 'rolling': props['field'] = collection['timeFilter']['field'] props['from'] = 'NOW-%s' % collection['timeFilter']['value'] props['to'] = 'NOW' props['gap'] = GAPS.get(collection['timeFilter']['value']) elif collection['timeFilter']['type'] == 'fixed': props['field'] = collection['timeFilter']['field'] props['from'] = collection['timeFilter']['from'] props['to'] = collection['timeFilter']['to'] props['fixed'] = True return props def _get_time_filter_query(self, timeFilter, facet): if 'fixed' in timeFilter: props = {} stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max']) gap = props['gap'] unit = re.split('\d+', gap)[1] return { 'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': unit}, 'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': unit}, 'gap': '%(gap)s' % props, # add a 'auto' } else: gap = timeFilter['gap'][facet['widgetType']] return { 'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': gap['unit']}, 'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': gap['unit']}, 'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get('time_filter_overrides'): params += (('fq', urllib.unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),) # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in force_unicode(value): value = force_unicode(value).replace('"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params ='{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'map': _keys = fq.copy() _keys.update(fq['properties']) params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote( utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } if 'start' in facet['properties']: _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'] }) if facet['properties']['facets']: if facet['properties']['facets'][0]['aggregate'] == 'count': _f['facet'] = { 'd2': { 'type': 'terms', 'field': '%(field)s' % facet['properties']['facets'][0], 'limit': int(facet['properties']['facets'][0].get('limit', 10)), 'mincount': int(facet['properties']['facets'][0]['mincount']) } } if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation _f['facet']['d2']['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][1]) } else: _f['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][0]) } json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function(facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += ( ('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']: fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add(collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add(collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),) else: params += (('fl', '*'),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += ( ('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, API_VERSION) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store user info self._thread_local = threading.local() def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER def setuser(self, user): """Return the previous user""" prev = self.user self._thread_local.user = user return prev def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status') def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) params['jobtype'] = jobtype filter_list = [ ] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs('wf', offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs('coord', offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs('bundle', offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({'len': -1}) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8') def _get_aggregate_function(self, facet): props = { 'field': facet['field'], 'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate'] } if props['aggregate'] == 'median': return 'percentile(%(field)s,50)' % props else: return '%(aggregate)s(%(field)s)' % props def _get_fq(self, query): params = () # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in smart_str(value): f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params ='{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties'])])),) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } if 'start' in facet['properties']: _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'] }) if facet['properties']['facets']: if facet['properties']['facets'][0]['aggregate'] == 'count': _f['facet'] = { 'd2': { 'type': 'terms', 'field': '%(field)s' % facet['properties']['facets'][0] } } if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation _f['facet']['d2']['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][1]) } else: _f['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][0]) } json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function(facet) elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += ( ('json.facet', json.dumps(json_facets)), ) params += self._get_fq(query) if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']: fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add(collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add(collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),) else: params += (('fl', '*'),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 3), ('hl.fragsize', 0), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q']), ('wt', 'json'), ) response = self._root.get('%(collection)s/suggest' % solr_query, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class AtlasApi(Api): """ https://atlas.apache.org """ DEFAULT_SEARCH_FIELDS = (('originalName', 3), ('originalDescription', 1), ('name', 10), ('description', 3), ('tags', 5)) CATALOG_NAMESPACE = '__cloudera_internal_catalog_hue' NAV_TO_ATLAS_TYPE = { 'table': 'hive_table', 'database': 'hive_db', 'field': 'hive_column' } ATLAS_TO_NAV_TYPE = { 'hive_table': 'TABLE', 'hive_db': 'DATABASE', 'hive_column': 'FIELD' } CLASSIFICATION_RE = re.compile( '(?:tag|tags|classification)\s*\:\s*(?:(?:\"([^"]+)\")|([^ ]+))\s*', re.IGNORECASE) TYPE_RE = re.compile('type\s*\:\s*([^ ]+)\s*', re.IGNORECASE) OWNER_RE = re.compile('owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE) def __init__(self, user=None): super(AtlasApi, self).__init__(user) self._api_url = CATALOG.API_URL.get().strip('/') + "/api/atlas" self._username = CATALOG.SERVER_USER.get() self._password = CATALOG.SERVER_PASSWORD.get() self._client = HttpClient(self._api_url, logger=LOG) if CATALOG.KERBEROS_ENABLED.get(): self._client.set_kerberos_auth() elif self._password: self._client.set_basic_auth(self._username, self._password) self._root = resource.Resource( self._client, urlencode=False) # For search_entities_interactive self.__headers = {} self.__params = () #self._fillup_properties() # Disabled currently def _get_types_from_sources(self, sources): default_entity_types = entity_types = ('DATABASE', 'TABLE', 'PARTITION', 'FIELD', 'FILE', 'VIEW', 'S3BUCKET', 'OPERATION', 'DIRECTORY') if 'sql' in sources or 'hive' in sources or 'impala' in sources: entity_types = ('TABLE', 'VIEW', 'DATABASE', 'PARTITION', 'FIELD') default_entity_types = ('TABLE', 'VIEW') elif 'hdfs' in sources: entity_types = ('FILE', 'DIRECTORY') default_entity_types = ('FILE', 'DIRECTORY') elif 's3' in sources: entity_types = ('FILE', 'DIRECTORY', 'S3BUCKET') default_entity_types = ('DIRECTORY', 'S3BUCKET') return default_entity_types, entity_types def adapt_atlas_entity_to_navigator(self, atlas_entity): nav_entity = { "created": 'createTime' in atlas_entity['attributes'] and atlas_entity['attributes']['createTime'], "customProperties": None, "description": atlas_entity['attributes'].get('description'), "identity": atlas_entity['guid'], "internalType": atlas_entity['typeName'], "meaningNames": atlas_entity['meaningNames'], # Atlas specific "meanings": atlas_entity['meanings'], # Atlas specific "name": atlas_entity['attributes'].get('name'), "original_name": atlas_entity['attributes'].get('name'), "originalDescription": None, "originalName": atlas_entity['attributes'].get('name'), "owner": atlas_entity['attributes'].get('owner'), "parentPath": '', # Set below "properties": {}, # Set below "sourceType": '', # Set below "classifications": [], "tags": atlas_entity['classificationNames'], "type": self.ATLAS_TO_NAV_TYPE.get(atlas_entity['typeName'].lower()) or atlas_entity['typeName'] } # Convert Atlas qualified name of form db.tbl.col@cluster to parentPath of form /db/tbl if atlas_entity['typeName'].lower().startswith('hive_'): nav_entity['sourceType'] = 'HIVE' qualified_path_parts = re.sub( r'@.*$', '', atlas_entity['attributes'].get('qualifiedName')).split('.') qualified_path_parts.pop( ) # it's just the parent path we want so remove the entity name nav_entity['parentPath'] = '/' + '/'.join(qualified_path_parts) if 'classifications' in atlas_entity: nav_entity['classifications'] = atlas_entity['classifications'] for atlas_classification in atlas_entity['classifications']: if 'attributes' in atlas_classification: for key, value in atlas_classification[ 'attributes'].iteritems(): nav_entity['properties'][key] = value return nav_entity def fetch_single_entity(self, dsl_query): ''' REQUEST: hue:8889/metadata/api/navigator/find_entity?type=database&name=default SAMPLE response for Navigator find_entity response {"status": 0, "entity": { "customProperties": null, "deleteTime": null, "fileSystemPath": "hdfs://nightly6x-1.vpc.cloudera.com:8020/user/hive/warehouse", "description": null, "params": null, "type": "DATABASE", "internalType": "hv_database", "sourceType": "HIVE", "tags": [], "deleted": false, "technicalProperties": null, "userEntity": false, "originalDescription": "Default Hive database", "metaClassName": "hv_database", "properties": {"__cloudera_internal__hueLink": "https://nightly6x-1.vpc.cloudera.com:8889/hue/metastore/tables/default"}, "identity": "23", "firstClassParentId": null, "name": null, "extractorRunId": "7##1", "sourceId": "7", "packageName": "nav", "parentPath": null, "originalName": "default"}} ''' response = {"status": 0, "entity": []} try: atlas_response = self._root.get('/v2/search/dsl?query=%s' % dsl_query, headers=self.__headers, params=self.__params) if not 'entities' in atlas_response or len( atlas_response['entities']) < 1: raise CatalogEntityDoesNotExistException( 'Could not find entity with query: %s' % dsl_query) for atlas_entity in atlas_response['entities']: response['entity'].append( self.adapt_atlas_entity_to_navigator(atlas_entity)) return response['entity'][0] except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Hue could not query Atlas', detail=e) def get_database(self, name): # Search with Atlas API for hive database with specific name if get_catalog_search_cluster(): qualifiedNameCriteria = 'qualifiedName=\'%s@%s\'' % ( name, get_catalog_search_cluster()) else: qualifiedNameCriteria = 'qualifiedName like \'%s@*\'' % name return self.fetch_single_entity('hive_db where %s' % qualifiedNameCriteria) def get_table(self, database_name, table_name, is_view=False): # Search with Atlas API for hive tables with specific name if get_catalog_search_cluster(): qualifiedNameCriteria = 'qualifiedName=\'%s.%s@%s\'' % ( database_name, table_name, get_catalog_search_cluster()) else: qualifiedNameCriteria = 'qualifiedName like \'%s.%s@*\'' % ( database_name, table_name) return self.fetch_single_entity('hive_table where %s' % qualifiedNameCriteria) def get_field(self, database_name, table_name, field_name): # Search with Atlas API for hive tables with specific qualified name if get_catalog_search_cluster(): qualifiedNameCriteria = 'qualifiedName=\'%s.%s.%s@%s\'' % ( database_name, table_name, field_name, get_catalog_search_cluster()) else: qualifiedNameCriteria = 'qualifiedName like \'%s.%s.%s@*\'' % ( database_name, table_name, field_name) return self.fetch_single_entity('hive_column where %s' % qualifiedNameCriteria) def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None, facetRanges=None, filterQueries=None, firstClassEntitiesOnly=None, sources=None): response = {"status": 0, "results": [], "facets": {"tags": {}}} # This takes care of the list_tags endpoint if not query_s and facetFields and 'tags' in facetFields: classification_response = self._root.get( '/v2/types/typedefs?type=classification') for classification_def in classification_response[ 'classificationDefs']: if ' ' in classification_def['name']: response['facets']['tags']['"' + classification_def['name'] + '"'] = -1 else: response['facets']['tags'][classification_def['name']] = -1 return response query_s = (query_s.strip() if query_s else '').replace('*', '') atlas_type = None classification = None owner = None # Take the first classification and type facets and ignore other as we can't search multiple in Atlas. classification_facets = self.CLASSIFICATION_RE.findall(query_s) if classification_facets: classification = classification_facets[0][ 0] or classification_facets[0][1] query_s = self.CLASSIFICATION_RE.sub('', query_s).strip() atlas_type = 'Asset' # Filtered below to just contain hive_db, hive_table or hive_column owner_facets = self.OWNER_RE.findall(query_s) if owner_facets: owner = owner_facets[0] query_s = self.OWNER_RE.sub('', query_s).strip() type_facets = self.TYPE_RE.findall(query_s) if type_facets: atlas_type = self.NAV_TO_ATLAS_TYPE[ type_facets[0].lower()] or type_facets[0] query_s = self.TYPE_RE.sub('', query_s).strip() data = { 'attributes': None, 'classification': classification, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'condition': 'OR', 'criterion': [{ 'attributeName': 'name', 'attributeValue': query_s, 'operator': 'contains' }, { 'attributeName': 'description', 'attributeValue': query_s, 'operator': 'contains' }] }] }, 'excludeDeletedEntities': True, 'includeClassificationAttributes': True, 'includeSubClassifications': True, 'includeSubTypes': True, 'limit': limit, 'offset': 0, 'tagFilters': None, 'termName': None, 'typeName': atlas_type or 'hive_table' } if get_catalog_search_cluster(): data['entityFilters']['criterion'].append({ 'attributeName': 'qualifiedName', 'operator': 'contains', 'attributeValue': '@' + get_catalog_search_cluster() }) if owner: data['entityFilters']['criterion'].append({ 'attributeName': 'owner', 'operator': 'startsWith', 'attributeValue': owner }) try: atlas_response = self._root.post('/v2/search/basic', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) # Adapt Atlas entities to Navigator structure in the results if 'entities' in atlas_response: for atlas_entity in atlas_response['entities']: if atlas_type != 'Asset' or atlas_entity['typeName'].lower( ) in ['hive_db', 'hive_table', 'hive_column']: response['results'].append( self.adapt_atlas_entity_to_navigator(atlas_entity)) return response except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Hue could not query Atlas', detail=e) # search_enties is only used by the table browser to fetch child entities of a given table or database. def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filters): try: found_entities = [] search_terms = [term for term in query_s.strip().split() ] if query_s else [] parentPath = None for term in search_terms: if 'parentPath:' in term: name, val = term.split(':') parentPath = val.strip('"').lstrip('/').replace('/', '.') if query_s == 'type:database': if get_catalog_search_cluster(): atlas_dsl_query = 'from hive_db where qualifiedName like \'*@%s\' limit %s' % ( get_catalog_search_cluster(), limit) else: atlas_dsl_query = 'from hive_db limit %s' % limit elif not parentPath: return found_entities else: atlas_type = 'hive_table' if parentPath.count( '.') == 0 else 'hive_column' if get_catalog_search_cluster(): atlas_dsl_query = 'from %s where qualifiedName like \'%s*@%s\' limit %s' % ( atlas_type, parentPath, get_catalog_search_cluster(), limit) else: atlas_dsl_query = 'from %s where qualifiedName like \'%s*\' limit %s' % ( atlas_type, parentPath, limit) atlas_response = self._root.get('/v2/search/dsl?query=%s' % atlas_dsl_query) # Adapt Atlas entities to Navigator structure in the results if 'entities' in atlas_response: for atlas_entity in atlas_response['entities']: found_entities.append( self.adapt_atlas_entity_to_navigator(atlas_entity)) return found_entities except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Hue could not query Atlas', detail=e) def suggest(self, prefix=None): try: return self._root.get('interactive/suggestions?query=%s' % (prefix or '*')) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to search for entities', detail=e) def get_entity(self, entity_id): """ # TODO: get entity by Atlas __guid or qualifiedName GET /v2/search/dsl?query=? """ try: return self._root.get('entities/%s' % entity_id, headers=self.__headers, params=self.__params) except RestException as e: msg = 'Failed to get entity %s: %s' % (entity_id, str(e)) LOG.error(msg) raise CatalogApiException(e.message) def update_entity(self, entity, **metadata): """ PUT /api/v3/entities/:id http://cloudera.github.io/navigator/apidocs/v3/path__v3_entities_-id-.html """ try: # Workarounds NAV-6187: if we don't re-send those, they would get erased. properties = { 'name': entity['name'], 'description': entity['description'], 'properties': entity['properties'] or {}, 'customProperties': entity['customProperties'] or {} } properties.update(metadata) data = json.dumps(properties) return self._root.put('entities/%(identity)s' % entity, params=self.__params, data=data, contenttype=_JSON_CONTENT_TYPE, allow_redirects=True, clear_cookies=True) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to update entity', detail=e) def get_cluster_source_ids(self): return [] # params = ( # ('query', 'clusterName:"%s"' % get_navigator_hue_server_name()), # ('limit', 200), # ) # LOG.info(params) # return self._root.get('entities', headers=self.__headers, params=params) def add_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] new_tags.extend(tags) return self.update_entity(entity, tags=new_tags) def delete_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] for tag in tags: if tag in new_tags: new_tags.remove(tag) return self.update_entity(entity, tags=new_tags) def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None): entity = self.get_entity(entity_id) if modified_custom_metadata: properties['properties'] = entity['properties'] or {} properties['properties'].update(modified_custom_metadata) if deleted_custom_metadata_keys: properties['properties'] = entity['properties'] or {} for key in deleted_custom_metadata_keys: if key in properties['properties']: del properties['properties'][key] return self.update_entity(entity, **properties) def delete_metadata_properties(self, entity_id, property_keys): entity = self.get_entity(entity_id) new_props = entity['properties'] or {} for key in property_keys: if key in new_props: del new_props[key] return self.update_entity(entity, properties=new_props) def get_lineage(self, entity_id): """ GET /api/v3/lineage/entityIds=:id http://cloudera.github.io/navigator/apidocs/v3/path__v3_lineage.html """ try: params = self.__params params += (('entityIds', entity_id), ) return self._root.get('lineage', headers=self.__headers, params=params) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to get lineage', detail=e) def create_namespace(self, namespace, description=None): try: data = json.dumps({'name': namespace, 'description': description}) return self._root.post('models/namespaces/', data=data, contenttype=_JSON_CONTENT_TYPE, clear_cookies=True) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to create namespace', detail=e) def get_namespace(self, namespace): try: return self._root.get('models/namespaces/%(namespace)s' % {'namespace': namespace}) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to get namespace', detail=e) def create_namespace_property(self, namespace, properties): try: data = json.dumps(properties) return self._root.post( 'models/namespaces/%(namespace)s/properties' % {'namespace': namespace}, data=data, contenttype=_JSON_CONTENT_TYPE, clear_cookies=True) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to create namespace', detail=e) def get_namespace_properties(self, namespace): try: return self._root.get( 'models/namespaces/%(namespace)s/properties' % {'namespace': namespace}) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to create namespace', detail=e) def map_namespace_property(self, clazz, properties): try: data = json.dumps(properties) return self._root.post( 'models/packages/nav/classes/%(class)s/properties' % {'class': clazz}, data=data, contenttype=_JSON_CONTENT_TYPE, clear_cookies=True) except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception('Failed to map class', detail=e) def get_model_properties_mapping(self): try: return self._root.get('models/properties/mappings') except RestException as e: if e.code == 401: raise raise_popup_exception( 'Hue could not authenticate to Atlas', detail=e) else: raise raise_popup_exception( 'Failed to get models properties mappings', detail=e) def _fillup_properties(self): global _HAS_CATALOG_NAMESPACE if _HAS_CATALOG_NAMESPACE is None: response = self.get_namespace(namespace=AtlasApi.CATALOG_NAMESPACE) if not response: self.create_namespace( namespace=AtlasApi.CATALOG_NAMESPACE, description="Set of fields to augment the data catalog") properties = self.get_namespace_properties( namespace=AtlasApi.CATALOG_NAMESPACE) if not [ _property for _property in properties if _property['name'] == 'relatedDocuments' ]: self.create_namespace_property( namespace=AtlasApi.CATALOG_NAMESPACE, properties={ "name": "relatedDocuments", "displayName": "Related documents", "description": "List of Hue document UUIDs related to this entity", "multiValued": True, "maxLength": 36, "pattern": ".*", # UUID "enumValues": None, "type": "TEXT" }) # Might want to check if the mapping is already done for clazz in ('hv_table', 'hv_view'): self.map_namespace_property(clazz, properties=[{ "namespace": AtlasApi.CATALOG_NAMESPACE, "name": "relatedDocuments" }]) _HAS_CATALOG_NAMESPACE = True def _get_boosted_term(self, term): return 'AND'.join([ '(%s)' % 'OR'.join([ '(%s:%s*^%s)' % (field, term, weight) for (field, weight) in AtlasApi.DEFAULT_SEARCH_FIELDS ]), # Matching fields '(%s)' % 'OR'.join([ '(%s:[* TO *])' % field for (field, weight) in AtlasApi.DEFAULT_SEARCH_FIELDS ]) # Boost entities with enriched fields # Could add certain customProperties and properties ]) def _clean_path(self, path): return path.rstrip('/').split('/')[-1], self._escape_slashes( path.rstrip('/')) def _escape_slashes(self, s): return s.replace('/', '\/')
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return {'doAs': self.user, 'timezone': TIME_ZONE.get()} return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime', 'text') VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text')) def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid, ), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid, ), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid, ), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid, ), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id, ), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore', 'update'): msg = 'Invalid oozie job action: %s' % (action, ) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join( ['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class ResourceManagerApi(object): def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(rm_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() return params def __str__(self): return "ResourceManagerApi at %s" % (self._url,) def setuser(self, user): curr = self.user self._thread_local.user = user return curr @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): params = self._get_params() return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): params = self._get_params() params.update(kwargs) return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): data = {'state': 'KILLED'} token = None # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app. if self.security_enabled and False: full_token = self.delegation_token() if 'token' not in full_token: raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token)) data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token') LOG.debug('Received delegation token %s' % full_token) try: params = self._get_params() return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) finally: if token: self.cancel_token(token) def delegation_token(self): params = self._get_params() data = {'renewer': self.username} return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def cancel_token(self, token): params = self._get_params() headers = {'Hadoop-YARN-RM-Delegation-Token': token} LOG.debug('Canceling delegation token of ' % self.username) return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__( self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get() ): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke("HEAD", "/") def _get_params(self): if self.security_enabled: return (("doAs", self._user),) return (("user.name", DEFAULT_USER), ("doAs", self._user)) def _get_q(self, query): q_template = "(%s)" if len(query["qs"]) >= 2 else "%s" return "OR".join([q_template % (q["q"] or EMPTY_QUERY.get()) for q in query["qs"]]).encode("utf-8") def _get_aggregate_function(self, facet): props = { "field": facet["field"], "aggregate": facet["properties"]["aggregate"] if "properties" in facet else facet["aggregate"], } if props["aggregate"] == "median": return "percentile(%(field)s,50)" % props else: return "%(aggregate)s(%(field)s)" % props def _get_range_borders(self, collection, query): props = {} GAPS = { "5MINUTES": { "histogram-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "bucket-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "bar-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "facet-widget": {"coeff": "+1", "unit": "MINUTES"}, # ~10 slots }, "30MINUTES": { "histogram-widget": {"coeff": "+20", "unit": "SECONDS"}, "bucket-widget": {"coeff": "+20", "unit": "SECONDS"}, "bar-widget": {"coeff": "+20", "unit": "SECONDS"}, "facet-widget": {"coeff": "+5", "unit": "MINUTES"}, }, "1HOURS": { "histogram-widget": {"coeff": "+30", "unit": "SECONDS"}, "bucket-widget": {"coeff": "+30", "unit": "SECONDS"}, "bar-widget": {"coeff": "+30", "unit": "SECONDS"}, "facet-widget": {"coeff": "+10", "unit": "MINUTES"}, }, "12HOURS": { "histogram-widget": {"coeff": "+7", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+7", "unit": "MINUTES"}, "bar-widget": {"coeff": "+7", "unit": "MINUTES"}, "facet-widget": {"coeff": "+1", "unit": "HOURS"}, }, "1DAYS": { "histogram-widget": {"coeff": "+15", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+15", "unit": "MINUTES"}, "bar-widget": {"coeff": "+15", "unit": "MINUTES"}, "facet-widget": {"coeff": "+3", "unit": "HOURS"}, }, "2DAYS": { "histogram-widget": {"coeff": "+30", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+30", "unit": "MINUTES"}, "bar-widget": {"coeff": "+30", "unit": "MINUTES"}, "facet-widget": {"coeff": "+6", "unit": "HOURS"}, }, "7DAYS": { "histogram-widget": {"coeff": "+3", "unit": "HOURS"}, "bucket-widget": {"coeff": "+3", "unit": "HOURS"}, "bar-widget": {"coeff": "+3", "unit": "HOURS"}, "facet-widget": {"coeff": "+1", "unit": "DAYS"}, }, "1MONTHS": { "histogram-widget": {"coeff": "+12", "unit": "HOURS"}, "bucket-widget": {"coeff": "+12", "unit": "HOURS"}, "bar-widget": {"coeff": "+12", "unit": "HOURS"}, "facet-widget": {"coeff": "+5", "unit": "DAYS"}, }, "3MONTHS": { "histogram-widget": {"coeff": "+1", "unit": "DAYS"}, "bucket-widget": {"coeff": "+1", "unit": "DAYS"}, "bar-widget": {"coeff": "+1", "unit": "DAYS"}, "facet-widget": {"coeff": "+30", "unit": "DAYS"}, }, "1YEARS": { "histogram-widget": {"coeff": "+3", "unit": "DAYS"}, "bucket-widget": {"coeff": "+3", "unit": "DAYS"}, "bar-widget": {"coeff": "+3", "unit": "DAYS"}, "facet-widget": {"coeff": "+12", "unit": "MONTHS"}, }, "2YEARS": { "histogram-widget": {"coeff": "+7", "unit": "DAYS"}, "bucket-widget": {"coeff": "+7", "unit": "DAYS"}, "bar-widget": {"coeff": "+7", "unit": "DAYS"}, "facet-widget": {"coeff": "+3", "unit": "MONTHS"}, }, "10YEARS": { "histogram-widget": {"coeff": "+1", "unit": "MONTHS"}, "bucket-widget": {"coeff": "+1", "unit": "MONTHS"}, "bar-widget": {"coeff": "+1", "unit": "MONTHS"}, "facet-widget": {"coeff": "+1", "unit": "YEARS"}, }, } time_field = collection["timeFilter"].get("field") if time_field and (collection["timeFilter"]["value"] != "all" or collection["timeFilter"]["type"] == "fixed"): # fqs overrides main time filter fq_time_ids = [fq["id"] for fq in query["fqs"] if fq["field"] == time_field] props["time_filter_overrides"] = fq_time_ids props["time_field"] = time_field if collection["timeFilter"]["type"] == "rolling": props["field"] = collection["timeFilter"]["field"] props["from"] = "NOW-%s" % collection["timeFilter"]["value"] props["to"] = "NOW" props["gap"] = GAPS.get(collection["timeFilter"]["value"]) elif collection["timeFilter"]["type"] == "fixed": props["field"] = collection["timeFilter"]["field"] props["from"] = collection["timeFilter"]["from"] props["to"] = collection["timeFilter"]["to"] props["fixed"] = True return props def _get_time_filter_query(self, timeFilter, facet): if "fixed" in timeFilter: props = {} stat_facet = {"min": timeFilter["from"], "max": timeFilter["to"]} _compute_range_facet(facet["widgetType"], stat_facet, props, stat_facet["min"], stat_facet["max"]) gap = props["gap"] unit = re.split("\d+", gap)[1] return { "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": unit}, "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": unit}, "gap": "%(gap)s" % props, # add a 'auto' } else: gap = timeFilter["gap"][facet["widgetType"]] return { "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": gap["unit"]}, "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": gap["unit"]}, "gap": "%(coeff)s%(unit)s/%(unit)s" % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get("time_filter_overrides"): params += (("fq", urllib.unquote(utf_quoter("%(field)s:[%(from)s TO %(to)s]" % timeFilter))),) # Merge facets queries on same fields grouped_fqs = groupby(query["fqs"], lambda x: (x["type"], x["field"])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq["filter"]: field_fq["filter"].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq["type"] == "field": fields = fq["field"] if type(fq["field"]) == list else [fq["field"]] # 2D facets support for field in fields: f = [] for _filter in fq["filter"]: values = ( _filter["value"] if type(_filter["value"]) == list else [_filter["value"]] ) # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = "-" if _filter["exclude"] else "" if value is not None and " " in force_unicode(value): value = force_unicode(value).replace('"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append("%s{!field f=%s}%s" % (exclude, field, value)) _params = "{!tag=%(id)s}" % fq + " ".join(f) params += (("fq", urllib.unquote(utf_quoter(_params))),) elif fq["type"] == "range": params += ( ( "fq", "{!tag=%(id)s}" % fq + " ".join( [ urllib.unquote( utf_quoter( "%s%s:[%s TO %s}" % ("-" if field["exclude"] else "", fq["field"], f["from"], f["to"]) ) ) for field, f in zip(fq["filter"], fq["properties"]) ] ), ), ) elif fq["type"] == "range-up": params += ( ( "fq", "{!tag=%(id)s}" % fq + " ".join( [ urllib.unquote( utf_quoter( "%s%s:[%s TO %s}" % ( "-" if field["exclude"] else "", fq["field"], f["from"] if fq["is_up"] else "*", "*" if fq["is_up"] else f["from"], ) ) ) for field, f in zip(fq["filter"], fq["properties"]) ] ), ), ) elif fq["type"] == "map": _keys = fq.copy() _keys.update(fq["properties"]) params += ( ( "fq", "{!tag=%(id)s}" % fq + urllib.unquote( utf_quoter( "%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}" % _keys ) ), ), ) return params def query(self, collection, query): solr_query = {} solr_query["collection"] = collection["name"] if query.get("download"): solr_query["rows"] = 1000 solr_query["start"] = 0 else: solr_query["rows"] = int(collection["template"]["rows"] or 10) solr_query["start"] = int(query["start"]) solr_query["rows"] = min(solr_query["rows"], 1000) solr_query["start"] = min(solr_query["start"], 10000) params = self._get_params() + ( ("q", self._get_q(query)), ("wt", "json"), ("rows", solr_query["rows"]), ("start", solr_query["start"]), ) if any(collection["facets"]): params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10)) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection["facets"]: if facet["type"] == "query": params += (("facet.query", "%s" % facet["field"]),) elif facet["type"] == "range" or facet["type"] == "range-up": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], "mincount": int(facet["properties"]["mincount"]), } if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "histogram-widget" ) ): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ( "facet.range", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "field": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } params += ( ( "facet.field", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "nested": _f = { "field": facet["field"], "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } if "start" in facet["properties"]: _f.update( { "type": "range", "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], } ) if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "bucket-widget" ) ): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update({"type": "terms", "field": facet["field"], "excludeTags": facet["id"]}) if facet["properties"]["facets"]: if facet["properties"]["facets"][0]["aggregate"] == "count": _f["facet"] = { "d2": { "type": "terms", "field": "%(field)s" % facet["properties"]["facets"][0], "limit": int(facet["properties"]["facets"][0].get("limit", 10)), "mincount": int(facet["properties"]["facets"][0]["mincount"]), } } if len(facet["properties"]["facets"]) > 1: # Get 3rd dimension calculation _f["facet"]["d2"]["facet"] = { "d2": self._get_aggregate_function(facet["properties"]["facets"][1]) } else: _f["facet"] = {"d2": self._get_aggregate_function(facet["properties"]["facets"][0])} json_facets[facet["id"]] = _f elif facet["type"] == "function": json_facets[facet["id"]] = self._get_aggregate_function(facet) json_facets["processEmpty"] = True elif facet["type"] == "pivot": if facet["properties"]["facets"] or facet["widgetType"] == "map-widget": fields = facet["field"] fields_limits = [] for f in facet["properties"]["facets"]: fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"])) fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"])) fields += "," + f["field"] keys = { "id": "%(id)s" % facet, "key": "%(field)s-%(id)s" % facet, "field": facet["field"], "fields": fields, "limit": int(facet["properties"].get("limit", 10)), "mincount": int(facet["properties"]["mincount"]), "fields_limits": " ".join(fields_limits), } params += ( ( "facet.pivot", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s" % keys, ), ) if json_facets: params += (("json.facet", json.dumps(json_facets)),) params += self._get_fq(collection, query) if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]: fields = set( collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else [] ) # Add field if needed if collection["template"]["leafletmap"].get("latitudeField"): fields.add(collection["template"]["leafletmap"]["latitudeField"]) if collection["template"]["leafletmap"].get("longitudeField"): fields.add(collection["template"]["leafletmap"]["longitudeField"]) if collection["template"]["leafletmap"].get("labelField"): fields.add(collection["template"]["leafletmap"]["labelField"]) params += (("fl", urllib.unquote(utf_quoter(",".join(list(fields))))),) else: params += (("fl", "*"),) params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000)) if collection["template"]["fieldsSelected"]: fields = [] for field in collection["template"]["fieldsSelected"]: attribute_field = filter( lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"] ) if attribute_field: if attribute_field[0]["sort"]["direction"]: fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"])) if fields: params += (("sort", ",".join(fields)),) response = self._root.get("%(collection)s/select" % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ("suggest", "true"), ("suggest.build", "true"), ("suggest.q", query["q"]), ("wt", "json"), ) if query.get("dictionary"): params += (("suggest.dictionary", query["dictionary"]),) response = self._root.get("%s/suggest" % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_("Error while accessing Solr"))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8') def _get_fq(self, query): params = () for fq in query['fqs']: if fq['type'] == 'field': # This does not work if spaces in Solr: # params += (('fq', ' '.join([urllib.unquote(utf_quoter('{!tag=%s}{!field f=%s}%s' % (fq['field'], fq['field'], _filter))) for _filter in fq['filter']])),) fields = fq['field'].split(':') # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'].split(':') if len(fields) > 1 else [_filter['value']] if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in value: f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params ='{!tag=%s}' % field + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%s}' % fq['field'] + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range': params += tuple([ ('facet.range', '{!ex=%s}%s' % (facet['field'], facet['field'])), ('f.%s.facet.range.start' % facet['field'], facet['properties']['start']), ('f.%s.facet.range.end' % facet['field'], facet['properties']['end']), ('f.%s.facet.range.gap' % facet['field'], facet['properties']['gap']), ('f.%s.facet.mincount' % facet['field'], facet['properties']['mincount']),] ) elif facet['type'] == 'field': params += ( ('facet.field', '{!ex=%s}%s' % (facet['field'], facet['field'])), ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10)) + 1), ('f.%s.facet.mincount' % facet['field'], int(facet['properties']['mincount'])), ) elif facet['type'] == 'pivot': if facet['properties']['facets']: fields = facet['field'] for f in facet['properties']['facets']: params += (('f.%s.facet.limit' % f['field'], f['limit']),) fields += ',' + f['field'] params += ( ('facet.pivot', '{!ex=%s}%s' % (fields, fields)), ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10))), ('facet.pivot.mincount', int(facet['properties']['mincount'])), ) params += self._get_fq(query) if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']: fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add(collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add(collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),) else: params += (('fl', '*'),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 3), ('hl.fragsize', 0), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q']), ('wt', 'json'), ) response = self._root.get('%(collection)s/suggest' % solr_query, params) if type(response) != dict: response = json.loads(response) return response except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class MapreduceApi(object): def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(mr_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.username self._thread_local.user = user return curr def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager(self.username).kill(app_id) # We need to call the RM
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._root = resource.Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) q_template = '(%s)' if len(query['qs']) >= 2 else '%s' params = self._get_params() + ( ('q', 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']])), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range': params += tuple([ ('facet.range', '{!ex=%s}%s' % (facet['field'], facet['field'])), ('f.%s.facet.range.start' % facet['field'], facet['properties']['start']), ('f.%s.facet.range.end' % facet['field'], facet['properties']['end']), ('f.%s.facet.range.gap' % facet['field'], facet['properties']['gap']), ('f.%s.facet.mincount' % facet['field'], facet['properties']['mincount']),] ) elif facet['type'] == 'field': params += ( ('facet.field', '{!ex=%s}%s' % (facet['field'], facet['field'])), ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10)) + 1), ('f.%s.facet.mincount' % facet['field'], int(facet['properties']['mincount'])), ) for fq in query['fqs']: if fq['type'] == 'field': # This does not work if spaces in Solr: # params += (('fq', ' '.join([urllib.unquote(utf_quoter('{!tag=%s}{!field f=%s}%s' % (fq['field'], fq['field'], _filter))) for _filter in fq['filter']])),) f = [] for _filter in fq['filter']: if _filter is not None and ' ' in _filter: f.append('%s:"%s"' % (fq['field'], _filter)) else: f.append('{!field f=%s}%s' % (fq['field'], _filter)) params += (('fq', urllib.unquote(utf_quoter('{!tag=%s}' % fq['field'] + ' '.join(f)))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%s}' % fq['field'] + ' '.join([urllib.unquote(utf_quoter('%s:[%s TO %s}' % (fq['field'], f['from'], f['to']))) for f in fq['properties']])),) if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']: fields = collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else [] params += (('fl', urllib.unquote(utf_quoter(','.join(fields)))),) else: params += (('fl', '*'),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 3) ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q']), ('wt', 'json'), ) response = self._root.get('%(collection)s/suggest' % solr_query, params) if type(response) != dict: response = json.loads(response) return response except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def _get_params(self): if self.security_enabled: return (('doAs', self._user), ) return ( ('user.name', SERVER_USER.get()), ('doAs', self._user), ) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([ q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs'] ]).encode('utf-8') def _get_aggregate_function(self, facet): props = { 'field': facet['field'], 'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate'] } if props['aggregate'] == 'median': return 'percentile(%(field)s,50)' % props else: return '%(aggregate)s(%(field)s)' % props def _get_range_borders(self, collection, query): props = {} GAPS = { '5MINUTES': { 'histogram-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'bucket-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'bar-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'facet-widget': { 'coeff': '+1', 'unit': 'MINUTES' }, # ~10 slots }, '30MINUTES': { 'histogram-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'bucket-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'bar-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'facet-widget': { 'coeff': '+5', 'unit': 'MINUTES' }, }, '1HOURS': { 'histogram-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'bucket-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'bar-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'facet-widget': { 'coeff': '+10', 'unit': 'MINUTES' }, }, '12HOURS': { 'histogram-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+1', 'unit': 'HOURS' }, }, '1DAYS': { 'histogram-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+3', 'unit': 'HOURS' }, }, '2DAYS': { 'histogram-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+6', 'unit': 'HOURS' }, }, '7DAYS': { 'histogram-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'bucket-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'bar-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'facet-widget': { 'coeff': '+1', 'unit': 'DAYS' }, }, '1MONTHS': { 'histogram-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'bucket-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'bar-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'facet-widget': { 'coeff': '+5', 'unit': 'DAYS' }, }, '3MONTHS': { 'histogram-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+30', 'unit': 'DAYS' }, }, '1YEARS': { 'histogram-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+12', 'unit': 'MONTHS' }, }, '2YEARS': { 'histogram-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+3', 'unit': 'MONTHS' }, }, '10YEARS': { 'histogram-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'bucket-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'bar-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'facet-widget': { 'coeff': '+1', 'unit': 'YEARS' }, } } time_field = collection['timeFilter'].get('field') if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'): # fqs overrides main time filter fq_time_ids = [ fq['id'] for fq in query['fqs'] if fq['field'] == time_field ] props['time_filter_overrides'] = fq_time_ids props['time_field'] = time_field if collection['timeFilter']['type'] == 'rolling': props['field'] = collection['timeFilter']['field'] props['from'] = 'NOW-%s' % collection['timeFilter']['value'] props['to'] = 'NOW' props['gap'] = GAPS.get(collection['timeFilter']['value']) elif collection['timeFilter']['type'] == 'fixed': props['field'] = collection['timeFilter']['field'] props['from'] = collection['timeFilter']['from'] props['to'] = collection['timeFilter']['to'] props['fixed'] = True return props def _get_time_filter_query(self, timeFilter, facet): if 'fixed' in timeFilter: props = {} stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max']) gap = props['gap'] unit = re.split('\d+', gap)[1] return { 'start': '%(from)s/%(unit)s' % { 'from': timeFilter['from'], 'unit': unit }, 'end': '%(to)s/%(unit)s' % { 'to': timeFilter['to'], 'unit': unit }, 'gap': '%(gap)s' % props, # add a 'auto' } else: gap = timeFilter['gap'][facet['widgetType']] return { 'start': '%(from)s/%(unit)s' % { 'from': timeFilter['from'], 'unit': gap['unit'] }, 'end': '%(to)s/%(unit)s' % { 'to': timeFilter['to'], 'unit': gap['unit'] }, 'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get('time_filter_overrides'): params += (('fq', urllib.unquote( utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))), ) # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [ fq['field'] ] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type( _filter['value']) == list else [ _filter['value'] ] # 2D facets support if fields.index(field) < len( values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in force_unicode( value): value = force_unicode(value).replace( '"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params = '{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))), ) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([ urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties']) ])), ) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([ urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties']) ])), ) elif fq['type'] == 'map': _keys = fq.copy() _keys.update(fq['properties']) params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote( utf_quoter( '%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))), ) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']), ) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update( self._get_time_filter_query(timeFilter, facet)) params += (( 'facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += (( 'facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } if 'start' in facet['properties']: _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update( self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'] }) if facet['properties']['facets']: if facet['properties']['facets'][0][ 'aggregate'] == 'count': _f['facet'] = { 'd2': { 'type': 'terms', 'field': '%(field)s' % facet['properties']['facets'][0], 'limit': int(facet['properties']['facets'][0].get( 'limit', 10)), 'mincount': int(facet['properties']['facets'][0] ['mincount']) } } if len(facet['properties']['facets'] ) > 1: # Get 3rd dimension calculation _f['facet']['d2']['facet'] = { 'd2': self._get_aggregate_function( facet['properties']['facets'][1]) } else: _f['facet'] = { 'd2': self._get_aggregate_function( facet['properties']['facets'][0]) } json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function( facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet[ 'widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += (( 'facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += (('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template'][ 'isGridLayout']: fields = set( collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add( collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add( collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))), ) else: params += (('fl', '*'), ) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter( lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append( '%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += (('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += (('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']), ) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update( self._get_time_filter_query(timeFilter, facet)) params += (( 'facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += (( 'facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'text-facet-widget' else 0), 'mincount': int(facet['properties']['mincount']), 'sort': { 'count': facet['properties']['sort'] }, } print facet if facet['properties']['domain'].get( 'blockParent' ) or facet['properties']['domain'].get('blockChildren'): _f['domain'] = {} if facet['properties']['domain'].get('blockParent'): _f['domain']['blockParent'] = ' OR '.join( facet['properties']['domain']['blockParent']) if facet['properties']['domain'].get('blockChildren'): _f['domain']['blockChildren'] = ' OR '.join( facet['properties']['domain']['blockChildren']) if 'start' in facet['properties'] and not facet[ 'properties'].get('type') == 'field': _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update( self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'], 'offset': 0, 'numBuckets': True, 'allBuckets': True, 'prefix': '' }) if facet['properties']['canRange'] and not facet[ 'properties']['isDate']: del _f['mincount'] # Numeric fields do not support if facet['properties']['facets']: self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1) if facet['widgetType'] == 'text-facet-widget': _fname = _f['facet'].keys()[0] _f['sort'] = {_fname: facet['properties']['sort']} # domain = '-d2:NaN' # Solr 6.4 json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function( facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet[ 'widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += (( 'facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += (('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template'][ 'isGridLayout']: fields = set( collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add( collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add( collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) fl = urllib.unquote(utf_quoter(','.join(list(fields)))) else: fl = '*' nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote( utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) params += (('fl', fl), ) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter( lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append( '%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += (('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) if facet['aggregate']['function'] == 'count': if 'facet' not in _f: _f['facet'] = {f_name: {}} else: _f['facet'][f_name] = {} _f = _f['facet'] _f[f_name] = { 'type': 'terms', 'field': '%(field)s' % facet, 'limit': int(facet.get('limit', 10)), 'mincount': int(facet['mincount']), 'numBuckets': True, 'allBuckets': True, 'prefix': '' } if widget['widgetType'] == 'tree2-widget' and facets[-1][ 'aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim) else: agg_function = self._get_aggregate_function(facet) _f['facet'] = { 'agg_%02d_00:%s' % (dim, agg_function): agg_function } for i, _f_agg in enumerate(facets[1:], 1): if _f_agg['aggregate']['function'] != 'count': agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1) # Get n+1 dimension break def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += (('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url,) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.user self._thread_local.user = user return curr def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): if solr_url is None and hasattr(SOLR_URL, 'get'): solr_url = SOLR_URL.get() if solr_url: self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled or SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = {} if facet['properties']['facets']: self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter) if facet['properties'].get('domain'): if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'): _f['domain'] = {} if facet['properties']['domain'].get('blockParent'): _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent']) if facet['properties']['domain'].get('blockChildren'): _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren']) if _f: sort = {'count': facet['properties']['facets'][0]['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' dim_key = [key for key in _f['facet'].keys() if 'dim' in key][0] _f['facet'][dim_key].update({ 'excludeTags': facet['id'], 'offset': 0, 'numBuckets': True, 'allBuckets': True, 'sort': sort #'prefix': '' # Forbidden on numeric fields }) json_facets[facet['id']] = _f['facet'][dim_key] elif facet['type'] == 'function': if facet['properties']['facets']: json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0]) if facet['properties']['compare']['is_enabled']: # TODO: global compare override unit = re.split('\d+', facet['properties']['compare']['gap'])[1] json_facets[facet['id']] = { 'type': 'range', 'field': collection['timeFilter'].get('field'), 'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']), 'end': 'NOW/%s' % unit, 'gap': '+%(gap)s' % facet['properties']['compare'], 'facet': {facet['id']: json_facets[facet['id']]} } if facet['properties']['filter']['is_enabled']: json_facets[facet['id']] = { 'type': 'query', 'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(), 'facet': {facet['id']: json_facets[facet['id']]} } json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += ( ('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) fl = urllib.unquote(utf_quoter(','.join(Collection2.get_field_list(collection)))) nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents id_field = collection.get('idField', 'id') params += ( ('mlt', 'true'), ('mlt.fl', fl.replace(',%s' % id_field, '')), ('mlt.mintf', 1), ('mlt.mindf', 1), ('mlt.maxdf', 50), ('mlt.maxntp', 1000), ('mlt.count', 10), #('mlt.minwl', 1), #('mlt.maxwl', 1), ) fl = '*' params += (('fl', fl),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) if facet['aggregate']['function'] == 'count': if 'facet' not in _f: _f['facet'] = {f_name: {}} else: _f['facet'][f_name] = {} _f = _f['facet'] sort = {'count': facet['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facets)): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' _f[f_name] = { 'type': 'terms', 'field': '%(field)s' % facet, 'limit': int(facet.get('limit', 10)), 'mincount': int(facet['mincount']), 'numBuckets': True, 'allBuckets': True, 'sort': sort, 'missing': facet.get('missing', False) #'prefix': '' # Forbidden on numeric fields } if 'start' in facet and not facet.get('type') == 'field': _f[f_name].update({ 'type': 'range', 'start': facet['start'], 'end': facet['end'], 'gap': facet['gap'] }) # Only on dim 1 currently if timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides']): # or facet['widgetType'] != 'bucket-widget'): facet['widgetType'] = widget['widgetType'] _f[f_name].update(self._get_time_filter_query(timeFilter, facet)) if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter) else: agg_function = self._get_aggregate_function(facet) _f['facet'] = { 'agg_%02d_00:%s' % (dim, agg_function): agg_function } for i, _f_agg in enumerate(facets[1:], 1): if _f_agg['aggregate']['function'] != 'count': agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter) # Get n+1 dimension break def select(self, collection, query=None, rows=100, start=0): if query is None: query = EMPTY_QUERY.get() params = self._get_params() + ( ('q', query), ('wt', 'json'), ('rows', rows), ('start', start), ) response = self._root.get('%s/select' % collection, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += ( ('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class ResourceManagerApi(object): def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(rm_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info self.from_failover = False if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() return params def __str__(self): return "ResourceManagerApi at %s" % (self._url,) def setuser(self, user): curr = self.user self._thread_local.user = user return curr @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): params = self._get_params() return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): params = self._get_params() params.update(kwargs) return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def appattempts(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def appattempts_attempt(self, app_id, attempt_id): attempts = self.appattempts(app_id) for attempt in attempts['appAttempts']['appAttempt']: if attempt['id'] == attempt_id: return attempt raise PopupException('Application {} does not have application attempt with id {}'.format(app_id, attempt_id)) def kill(self, app_id): data = {'state': 'KILLED'} token = None # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app. if self.security_enabled and False: full_token = self.delegation_token() if 'token' not in full_token: raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token)) data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token') LOG.debug('Received delegation token %s' % full_token) try: params = self._get_params() return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) finally: if token: self.cancel_token(token) def delegation_token(self): params = self._get_params() data = {'renewer': self.username} return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def cancel_token(self, token): params = self._get_params() headers = {'Hadoop-YARN-RM-Delegation-Token': token} LOG.debug('Canceling delegation token of ' % self.username) return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers) def _execute(self, function, *args, **kwargs): response = None try: response = function(*args, **kwargs) except Exception, e: raise PopupException(_('YARN RM returned a failed response: %s') % e) return response
class LivyClient(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By': 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "LivyClient at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user.split('@')[0] if has_connectors(): # Only SQL supported via connectors currently properties['kind'] = 'sql' return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url, ) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.user self._thread_local.user = user return curr def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})