class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url, ) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers) def stages(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers) def executors(self, app_id, attempt_id): return self._root.get( 'applications/%(app_id)s/%(attempt_id)s/executors' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers)
class OptimizerApi(object): def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get()): self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/') self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get( ) self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get( ) self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) def create_product(self, product_name, product_secret): try: data = { 'productName': product_name, 'productSecret': product_secret, 'authCode': '' } return self._root.post('/api/createProduct', data) except RestException, e: raise PopupException(e, title=_('Error while accessing Optimizer'))
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith( 'This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG) self._data = None self._headers = None self._wbuf = buffer_writer() def open(self): pass def set_kerberos_auth(self, service="HTTP"): self._client.set_kerberos_auth(service=service) def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_bearer_auth(self, token): self._client.set_bearer_auth(token) def set_verify(self, verify=True): self._client.set_verify(verify) def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): if not self._headers: self._headers = {} self._headers.update(timeout=str(int(ms / 1000))) def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): data = self._wbuf.getvalue() self._wbuf = buffer_writer() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data, headers=self._headers)
class NodeManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "NodeManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class PrometheusApi(object): def __init__(self, user=None, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (PROMETHEUS.API_URL.get().strip('/'), VERSION) self.user = user self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def query(self, query): try: return self._root.get('query', { 'query': query, })['data'] except RestException as e: raise PrometheusApiException(e) def range_query(self, query, start, end, step): # e.g. /api/v1/query_range?query=up&start=2015-07-01T20:10:30.781Z&end=2015-07-01T20:11:00.781Z&step=15s try: return self._root.get('query_range', { 'query': query, 'start': start, 'end': end, 'step': step })['data'] except RestException as e: raise PrometheusApiException(e)
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException, e: raise ManagerApiException(e)
class OptimizerApi(object): def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None): self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/') self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get() self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get() self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get() self._email = OPTIMIZER.EMAIL.get() self._email_password = OPTIMIZER.EMAIL_PASSWORD.get() self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) def create_product(self, product_name=None, product_secret=None, authCode=None): try: data = { 'productName': product_name if product_name is not None else self._product_name, 'productSecret': product_secret if product_secret is not None else self._product_secret, 'authCode': authCode if authCode is not None else self._product_auth_secret } return self._root.post('/api/createProduct', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) except RestException, e: raise PopupException(e, title=_('Error while accessing Optimizer'))
class NodeManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "NodeManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException, e: raise ManagerApiException(e)
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._ssl_cert_ca_verify = ssl_cert_ca_verify if self._security_enabled: self._client.set_kerberos_auth() if ssl_cert_ca_verify: self._client.set_verify(True) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
class OptimizerApi(object): def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None): self._api_url = (api_url or get_optimizer_url()).strip('/') self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get() self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get() self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get() self._email = OPTIMIZER.EMAIL.get() self._email_password = OPTIMIZER.EMAIL_PASSWORD.get() self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) self._token = None def _authenticate(self, force=False): if self._token is None or force: self._token = self.authenticate()['token'] return self._token def create_product(self, product_name=None, product_secret=None, authCode=None): try: data = { 'productName': product_name if product_name is not None else self._product_name, 'productSecret': product_secret if product_secret is not None else self._product_secret, 'authCode': authCode if authCode is not None else self._product_auth_secret } return self._root.post('/api/createProduct', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) except RestException, e: raise PopupException(e, title=_('Error while accessing Optimizer'))
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG) self._data = None self._headers = None self._wbuf = StringIO() def open(self): pass def set_kerberos_auth(self): self._client.set_kerberos_auth() def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_verify(self, verify=True): self._client.set_verify(verify) def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): pass def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): data = self._wbuf.getvalue() self._wbuf = StringIO() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data, headers=self._headers)
class OptimizerApi(object): def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None): self._api_url = (api_url or get_optimizer_url()).strip('/') self._email = OPTIMIZER.EMAIL.get() self._email_password = OPTIMIZER.EMAIL_PASSWORD.get() self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get( ) self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get( ) self._product_name = product_name if product_name else ( OPTIMIZER.PRODUCT_NAME.get() or self.get_tenant()['tenant'] ) # Aka "tenant" self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) self._token = None def _authenticate(self, force=False): if self._token is None or force: self._token = self.authenticate()['token'] return self._token def _exec(self, command, args): data = None response = {'status': 'error'} try: cmd_args = [ 'ccs', 'navopt', '--endpoint-url=%s' % self._api_url, command ] if self._product_secret: cmd_args += ['--auth-config', self._product_secret] data = subprocess.check_output(cmd_args + args) except CalledProcessError, e: if command == 'upload' and e.returncode == 1: LOG.info( 'Upload command is successful despite return code of 1: %s' % e.output) data = '\n'.join( e.output.split('\n')[3:]) # Beware removing of {"url":...} except RestException, e: raise OptimizerApiException( e, title=_('Error while accessing Optimizer'))
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
def _create_query_store_client(request, content_type='application/json; charset=UTF-8'): headers = { 'x-do-as': request.user.username, 'X-Requested-By': 'das', 'Content-Type': content_type, 'Cookie': request.environ.get('HTTP_COOKIE') } client = HttpClient(QUERY_STORE.SERVER_URL.get()) client.set_headers(headers) client.set_verify(False) if USE_SASL.get(): client.set_kerberos_auth() return client
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def stages(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def executors(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
class PrometheusApi(object): def __init__(self, user=None, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (PROMETHEUS.API_URL.get().strip('/'), VERSION) self.user = user self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def query(self, query): try: return self._root.get('query', { 'query': query, })['data'] except RestException, e: raise PrometheusApiException(e)
class OptimizerApi(object): def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get()): self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/') self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get() self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get() self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) def create_product(self, product_name, product_secret): try: data = { 'productName': product_name, 'productSecret': product_secret, 'authCode': '' } return self._root.post('/api/createProduct', data) except RestException, e: raise PopupException(e, title=_('Error while accessing Optimizer'))
class SparkJob(Application): def __init__(self, job, rm_api=None, hs_api=None): super(SparkJob, self).__init__(job, rm_api) self._resolve_tracking_url() if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api: self.history_server_api = hs_api self._get_metrics() @property def logs_url(self): log_links = self.history_server_api.get_executors_loglinks(self) return log_links[ 'stdout'] if log_links and 'stdout' in log_links else '' @property def attempt_id(self): return self.trackingUrl.strip('/').split('/')[-1] def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e) finally:
class SparkJob(Application): def __init__(self, job, rm_api=None, hs_api=None): super(SparkJob, self).__init__(job, rm_api) self._resolve_tracking_url() if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api: self.history_server_api = hs_api self._get_metrics() @property def logs_url(self): log_links = self.history_server_api.get_executors_loglinks(self) return log_links['stdout'] if log_links and 'stdout' in log_links else '' @property def attempt_id(self): return self.trackingUrl.strip('/').split('/')[-1] def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception, e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e) finally:
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url, ) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager().kill(app_id) # We need to call the RM
class MapreduceApi(object): def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(mr_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url, ) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.username self._thread_local.user = user return curr def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager(self.username).kill( app_id) # We need to call the RM
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException as e: raise ManagerApiException(e) def get_spark_history_server_configs(self, cluster_name=None): service_name = "SPARK_ON_YARN" shs_role_type = "SPARK_YARN_HISTORY_SERVER" try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] if service_display_names: spark_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name })['items'] shs_server_names = [ server['name'] for server in servers if server['type'] == shs_role_type ] shs_server_name = shs_server_names[ 0] if shs_server_names else None shs_server_hostRef = [ server['hostRef'] for server in servers if server['type'] == shs_role_type ] shs_server_hostId = shs_server_hostRef[0][ 'hostId'] if shs_server_hostRef else None if shs_server_name and shs_server_hostId: shs_server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name, 'shs_server_name': shs_server_name }, params={'view': 'full'})['items'] return shs_server_hostId, shs_server_configs except Exception as e: LOG.warning("Check Spark History Server via ManagerApi: %s" % e) return None, None def get_spark_history_server_url(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_hostId and shs_server_configs: shs_ui_port = None shs_ssl_port = None shs_ssl_enabled = None for config in shs_server_configs: if 'relatedName' in config and 'default' in config: if config['relatedName'] == 'spark.history.ui.port': shs_ui_port = config['default'] if config['relatedName'] == 'spark.ssl.historyServer.port': shs_ssl_port = config['default'] if config[ 'relatedName'] == 'spark.ssl.historyServer.enabled': shs_ssl_enabled = config['default'] shs_ui_host = self._root.get('hosts/%(hostId)s' % {'hostId': shs_server_hostId}) shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None return self.assemble_shs_url(shs_ui_hostname, shs_ui_port, shs_ssl_port, shs_ssl_enabled) return None def get_spark_history_server_security_enabled(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_configs: for config in shs_server_configs: if 'relatedName' in config and 'default' in config and config[ 'relatedName'] == 'history_server_spnego_enabled': shs_security_enabled = config['default'] return shs_security_enabled and shs_security_enabled == 'true' return False def assemble_shs_url(self, shs_ui_hostname, shs_ui_port=None, shs_ssl_port=None, shs_ssl_enabled=None): if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled: LOG.warning("Spark conf not found!") return None protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http' shs_url = '%(protocol)s://%(hostname)s:%(port)s' % { 'protocol': protocol, 'hostname': shs_ui_hostname, 'port': shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port, } return shs_url def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException as e: raise ManagerApiException(e) def get_kafka_brokers(self, cluster_name=None): try: hosts = self._get_hosts('KAFKA', 'KAFKA_BROKER', cluster_name=cluster_name) brokers_hosts = [host['hostname'] + ':9092' for host in hosts] return ','.join(brokers_hosts) except RestException as e: raise ManagerApiException(e) def get_kudu_master(self, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == 'KUDU' ][0] master = self._get_roles(cluster['name'], service['name'], 'KUDU_MASTER')[0] master_host = self._root.get('hosts/%(hostId)s' % master['hostRef']) return master_host['hostname'] except RestException as e: raise ManagerApiException(e) def get_kafka_topics(self, broker_host): try: client = HttpClient('http://%s:24042' % broker_host, logger=LOG) root = Resource(client) return root.get('/api/topics') except RestException as e: raise ManagerApiException(e) def update_flume_config(self, cluster_name, config_name, config_value): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roleConfigGroup = [ role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] data = { u'items': [{ u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.' .replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace( '%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), u'body': { u'items': [{ u'name': config_name, u'value': config_value }] }, u'contentType': u'application/json', u'method': u'PUT' }] } return self.batch(items=data) def get_flume_agents(self, cluster_name=None): return [ host['hostname'] for host in self._get_hosts( 'FLUME', 'AGENT', cluster_name=cluster_name) ] def _get_hosts(self, service_name, role_name, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == service_name ][0] hosts = self._get_roles(cluster['name'], service['name'], role_name) hosts_ids = [host['hostRef']['hostId'] for host in hosts] hosts = self._root.get('hosts')['items'] return [host for host in hosts if host['hostId'] in hosts_ids] except RestException as e: raise ManagerApiException(e) def refresh_flume(self, cluster_name, restart=False): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roles = [ role['name'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] if restart: return self.restart_services(cluster['name'], service, roles) else: return self.refresh_configs(cluster['name'], service, roles) def refresh_configs(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def restart_services(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def batch(self, items): try: return self._root.post('batch', data=json.dumps(items), contenttype='application/json') except RestException as e: raise ManagerApiException(e) def _get_cluster(self, cluster_name=None): clusters = self._root.get('clusters/')['items'] if cluster_name is not None: cluster = [ cluster for cluster in clusters if cluster['name'] == cluster_name ][0] else: cluster = clusters[0] return cluster def _get_roles(self, cluster_name, service_name, role_type): roles = self._root.get( 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % { 'cluster_name': cluster_name, 'service_name': service_name })['items'] return [role for role in roles if role['type'] == role_type] def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): if not key or not impalad_host: return None service_name = "IMPALA" role_type = 'IMPALAD' try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] hosts = self._root.get('hosts')['items'] impalad_hostIds = [ host['hostId'] for host in hosts if host['hostname'] == impalad_host ] if impalad_hostIds and service_display_names: impalad_hostId = impalad_hostIds[0] impala_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name })['items'] impalad_server_names = [ server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId ] impalad_server_name = impalad_server_names[ 0] if impalad_server_names else None if impalad_server_name: server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name, 'shs_server_name': impalad_server_name }, params={'view': 'full'})['items'] for config in server_configs: if 'relatedName' in config and 'value' in config: if config['relatedName'] == key: return config['value'] except Exception as e: LOG.warning( "Get Impala Daemon API configurations via ManangerAPI: %s" % e) return None
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id): return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers) def stages(self, app_id): return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers) def executors(self, job): LOG.debug("Getting executors for Spark job %s" % job.jobId) app_id = self.get_real_app_id(job) if not app_id: return [] return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers) def stage_attempts(self, app_id, stage_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers) def stage_attempt(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_summary(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_list(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def storages(self, app_id): return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers) def storage(self, app_id, rdd_id): return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers) def download_logs(self, app_id): return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers) def download_attempt_logs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def download_executors_logs(self, request, job, name, offset): log_links = self.get_executors_loglinks(job) return self.retrieve_log_content(log_links, name, request.user.username, offset) def download_executor_logs(self, user, executor, name, offset): return self.retrieve_log_content(executor['logs'], name, user.username, offset) def retrieve_log_content(self, log_links, log_name, username, offset): params = { 'doAs': username } if offset != 0: params['start'] = offset if not log_name or not log_name == 'stderr': log_name = 'stdout' log = '' if log_links and log_name in log_links: log_link = log_links[log_name] root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False) response = root.get('', params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() return log def get_executors_loglinks(self, job): executor = None if job.metrics and 'executors' in job.metrics and job.metrics['executors']: executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver'] # look up driver executor if not executors: executor = job.metrics['executors'][0] else: executor = executors[0] return None if not executor else executor[12] def get_real_app_id(self, job): # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases. # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts. try: jobs_json = self.applications() job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId] if not job_filtered_json: return {} attempts = job_filtered_json[0]['attempts'] if len(attempts) == 1: app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId'] else: app_id = job.jobId + '/%d' % len(attempts) LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId)) except Exception as e: LOG.error('Cannot get real app id %s: %s' % (job.jobId, e)) app_id = None return app_id
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By' : 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class ResourceManagerApi(object): def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(rm_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info self.from_failover = False if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() return params def __str__(self): return "ResourceManagerApi at %s" % (self._url,) def setuser(self, user): curr = self.user self._thread_local.user = user return curr @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): params = self._get_params() return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): params = self._get_params() params.update(kwargs) return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def appattempts(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def appattempts_attempt(self, app_id, attempt_id): attempts = self.appattempts(app_id) for attempt in attempts['appAttempts']['appAttempt']: if attempt['id'] == attempt_id: return attempt raise PopupException('Application {} does not have application attempt with id {}'.format(app_id, attempt_id)) def kill(self, app_id): data = {'state': 'KILLED'} token = None # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app. if self.security_enabled and False: full_token = self.delegation_token() if 'token' not in full_token: raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token)) data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token') LOG.debug('Received delegation token %s' % full_token) try: params = self._get_params() return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) finally: if token: self.cancel_token(token) def delegation_token(self): params = self._get_params() data = {'renewer': self.username} return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def cancel_token(self, token): params = self._get_params() headers = {'Hadoop-YARN-RM-Delegation-Token': token} LOG.debug('Canceling delegation token of ' % self.username) return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers) def _execute(self, function, *args, **kwargs): response = None try: response = function(*args, **kwargs) except Exception, e: raise PopupException(_('YARN RM returned a failed response: %s') % e) return response
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__( self, solr_url, user, security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get(), ): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke("HEAD", "/") def query(self, collection, query): solr_query = {} solr_query["collection"] = collection["name"] if query.get("download"): solr_query["rows"] = 1000 solr_query["start"] = 0 else: solr_query["rows"] = int(collection["template"]["rows"] or 10) solr_query["start"] = int(query["start"]) solr_query["rows"] = min(solr_query["rows"], 1000) solr_query["start"] = min(solr_query["start"], 10000) params = self._get_params() + ( ("q", self._get_q(query)), ("wt", "json"), ("rows", solr_query["rows"]), ("start", solr_query["start"]), ) if any(collection["facets"]): params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10)) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection["facets"]: if facet["type"] == "query": params += (("facet.query", "%s" % facet["field"]),) elif facet["type"] == "range" or facet["type"] == "range-up": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], "mincount": int(facet["properties"]["mincount"]), } if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "histogram-widget" ) ): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ( "facet.range", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "field": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } params += ( ( "facet.field", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "nested": _f = { "field": facet["field"], "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "text-facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), "sort": {"count": facet["properties"]["sort"]}, } print facet if facet["properties"]["domain"].get("blockParent") or facet["properties"]["domain"].get( "blockChildren" ): _f["domain"] = {} if facet["properties"]["domain"].get("blockParent"): _f["domain"]["blockParent"] = " OR ".join(facet["properties"]["domain"]["blockParent"]) if facet["properties"]["domain"].get("blockChildren"): _f["domain"]["blockChildren"] = " OR ".join(facet["properties"]["domain"]["blockChildren"]) if "start" in facet["properties"] and not facet["properties"].get("type") == "field": _f.update( { "type": "range", "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], } ) if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "bucket-widget" ) ): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update( { "type": "terms", "field": facet["field"], "excludeTags": facet["id"], "offset": 0, "numBuckets": True, "allBuckets": True, "prefix": "", } ) if facet["properties"]["canRange"] and not facet["properties"]["isDate"]: del _f["mincount"] # Numeric fields do not support if facet["properties"]["facets"]: self._n_facet_dimension(facet, _f, facet["properties"]["facets"], 1) if facet["widgetType"] == "text-facet-widget": _fname = _f["facet"].keys()[0] _f["sort"] = {_fname: facet["properties"]["sort"]} # domain = '-d2:NaN' # Solr 6.4 json_facets[facet["id"]] = _f elif facet["type"] == "function": json_facets[facet["id"]] = self._get_aggregate_function(facet) json_facets["processEmpty"] = True elif facet["type"] == "pivot": if facet["properties"]["facets"] or facet["widgetType"] == "map-widget": fields = facet["field"] fields_limits = [] for f in facet["properties"]["facets"]: fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"])) fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"])) fields += "," + f["field"] keys = { "id": "%(id)s" % facet, "key": "%(field)s-%(id)s" % facet, "field": facet["field"], "fields": fields, "limit": int(facet["properties"].get("limit", 10)), "mincount": int(facet["properties"]["mincount"]), "fields_limits": " ".join(fields_limits), } params += ( ( "facet.pivot", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s" % keys, ), ) if json_facets: params += (("json.facet", json.dumps(json_facets)),) params += self._get_fq(collection, query) if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]: fields = set( collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else [] ) # Add field if needed if collection["template"]["leafletmap"].get("latitudeField"): fields.add(collection["template"]["leafletmap"]["latitudeField"]) if collection["template"]["leafletmap"].get("longitudeField"): fields.add(collection["template"]["leafletmap"]["longitudeField"]) if collection["template"]["leafletmap"].get("labelField"): fields.add(collection["template"]["leafletmap"]["labelField"]) fl = urllib.unquote(utf_quoter(",".join(list(fields)))) else: fl = "*" nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % " OR ".join(nested_fields))) params += (("fl", fl),) params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000)) if collection["template"]["fieldsSelected"]: fields = [] for field in collection["template"]["fieldsSelected"]: attribute_field = filter( lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"] ) if attribute_field: if attribute_field[0]["sort"]["direction"]: fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"])) if fields: params += (("sort", ",".join(fields)),) response = self._root.get("%(collection)s/select" % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim): facet = facets[0] f_name = "dim_%02d:%s" % (dim, facet["field"]) if facet["aggregate"]["function"] == "count": if "facet" not in _f: _f["facet"] = {f_name: {}} else: _f["facet"][f_name] = {} _f = _f["facet"] _f[f_name] = { "type": "terms", "field": "%(field)s" % facet, "limit": int(facet.get("limit", 10)), "mincount": int(facet["mincount"]), "numBuckets": True, "allBuckets": True, "prefix": "", } if widget["widgetType"] == "tree2-widget" and facets[-1]["aggregate"]["function"] != "count": _f["subcount"] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]["aggregate"]["function"] == "count": self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim) else: agg_function = self._get_aggregate_function(facet) _f["facet"] = {"agg_%02d_00:%s" % (dim, agg_function): agg_function} for i, _f_agg in enumerate(facets[1:], 1): if _f_agg["aggregate"]["function"] != "count": agg_function = self._get_aggregate_function(_f_agg) _f["facet"]["agg_%02d_%02d:%s" % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1) # Get n+1 dimension break def suggest(self, collection, query): try: params = self._get_params() + ( ("suggest", "true"), ("suggest.build", "true"), ("suggest.q", query["q"]), ("wt", "json"), ) if query.get("dictionary"): params += (("suggest.dictionary", query["dictionary"]),) response = self._root.get("%s/suggest" % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_("Error while accessing Solr"))
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return {'doAs': self.user, 'timezone': TIME_ZONE.get()} return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime', 'text') VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text')) def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid, ), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid, ), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid, ), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid, ), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id, ), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore', 'update'): msg = 'Invalid oozie job action: %s' % (action, ) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join( ['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class OptimizerApi(object): UPLOAD = { 'queries': { 'headers': ['SQL_ID', 'ELAPSED_TIME', 'SQL_FULLTEXT'], 'file_headers': """{ "fileLocation": "%(query_file)s", "tenant": "%(tenant)s", "fileName": "%(query_file_name)s", "sourcePlatform": "%(source_platform)s", "colDelim": ",", "rowDelim": "\\n", "headerFields": [ { "count": 0, "coltype": "SQL_ID", "use": true, "tag": "", "name": "SQL_ID" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "ELAPSED_TIME" }, { "count": 0, "coltype": "SQL_QUERY", "use": true, "tag": "", "name": "SQL_FULLTEXT" } ] }""" }, 'table_stats': { 'headers': ['TABLE_NAME', 'NUM_ROWS'], 'file_headers': """{ "fileLocation": "%(query_file)s", "tenant": "%(tenant)s", "fileName": "%(query_file_name)s", "sourcePlatform": "%(source_platform)s", "colDelim": ",", "rowDelim": "\\n", "headerFields": [ { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "TABLE_NAME" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "NUM_ROWS" } ] }""" }, 'cols_stats': { 'headers': ['table_name', 'column_name', 'data_type', 'num_distinct', 'num_nulls', 'avg_col_len'], # Lower case for some reason 'file_headers': """{ "fileLocation": "%(query_file)s", "tenant": "%(tenant)s", "fileName": "%(query_file_name)s", "sourcePlatform": "%(source_platform)s", "colDelim": ",", "rowDelim": "\\n", "headerFields": [ { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "table_name" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "column_name" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "data_type" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "num_distinct" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "num_nulls" }, { "count": 0, "coltype": "NONE", "use": true, "tag": "", "name": "avg_col_len" } ] }""" } } def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None): self._api_url = (api_url or get_optimizer_url()).strip('/') self._email = OPTIMIZER.EMAIL.get() self._email_password = OPTIMIZER.EMAIL_PASSWORD.get() self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get() self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get() self._product_name = product_name if product_name else (OPTIMIZER.PRODUCT_NAME.get() or self.get_tenant()['tenant']) # Aka "workload" self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) self._token = None def _authenticate(self, force=False): if self._token is None or force: self._token = self.authenticate()['token'] return self._token def _exec(self, command, args): data = None response = {'status': 'error'} try: cmd_args = [ 'ccs', 'navopt', '--endpoint-url=%s' % self._api_url, command ] if self._product_secret: cmd_args += ['--auth-config', self._product_secret] LOG.info(' '.join(cmd_args + args)) data = subprocess.check_output(cmd_args + args) except CalledProcessError, e: if command == 'upload' and e.returncode == 1: LOG.info('Upload command is successful despite return code of 1: %s' % e.output) data = '\n'.join(e.output.split('\n')[3:]) # Beware removing of {"url":...} else: raise OptimizerApiException(e, title=_('Error while accessing Optimizer')) except RestException, e: raise OptimizerApiException(e, title=_('Error while accessing Optimizer'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): if solr_url is None and hasattr(SOLR_URL, 'get'): solr_url = SOLR_URL.get() if solr_url: self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled or SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def query(self, collection, query): solr_query = {} json_facets = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if facet['properties']['canRange'] or timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update(self._get_time_filter_query(timeFilter, facet, collection)) params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = {} if facet['properties']['facets']: self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter, collection, can_range = facet['properties']['canRange']) if facet['properties'].get('domain'): if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'): _f['domain'] = {} if facet['properties']['domain'].get('blockParent'): _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent']) if facet['properties']['domain'].get('blockChildren'): _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren']) if _f: sort = {'count': facet['properties']['facets'][0]['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' dim_key = [key for key in list(_f['facet'].keys()) if 'dim' in key][0] _f['facet'][dim_key].update({ 'excludeTags': facet['id'], 'offset': 0, 'numBuckets': True, 'allBuckets': True, 'sort': sort #'prefix': '' # Forbidden on numeric fields }) json_facets[facet['id']] = _f['facet'][dim_key] elif facet['type'] == 'function': if facet['properties']['facets']: json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0]) if facet['properties']['compare']['is_enabled']: # TODO: global compare override unit = re.split('\d+', facet['properties']['compare']['gap'])[1] json_facets[facet['id']] = { 'type': 'range', 'field': collection['timeFilter'].get('field'), 'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']), 'end': 'NOW/%s' % unit, 'gap': '+%(gap)s' % facet['properties']['compare'], 'facet': {facet['id']: json_facets[facet['id']]} } if facet['properties']['filter']['is_enabled']: json_facets[facet['id']] = { 'type': 'query', 'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(), 'facet': {facet['id']: json_facets[facet['id']]} } json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) params += self._get_fq(collection, query) fl = urllib_unquote(utf_quoter(','.join(Collection2.get_field_list(collection)))) nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib_unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents id_field = collection.get('idField', 'id') params += ( ('mlt', 'true'), ('mlt.fl', fl.replace(',%s' % id_field, '')), ('mlt.mintf', 1), ('mlt.mindf', 1), ('mlt.maxdf', 50), ('mlt.maxntp', 1000), ('mlt.count', 10), #('mlt.minwl', 1), #('mlt.maxwl', 1), ) fl = '*' params += (('fl', fl),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) #if query.get('timezone'): # params += (('TZ', query.get('timezone')),) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = [attribute for attribute in collection['template']['fieldsAttributes'] if field == attribute['name']] if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) if json_facets: response = self._root.post( '%(collection)s/select' % solr_query, params, data=json.dumps({'facet': json_facets}), contenttype='application/json') else: response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, can_range=None): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) if facet['aggregate']['function'] == 'count': if 'facet' not in _f: _f['facet'] = {f_name: {}} else: _f['facet'][f_name] = {} _f = _f['facet'] sort = {'count': facet['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facets)): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' _f[f_name] = { 'type': 'terms', 'field': '%(field)s' % facet, 'limit': int(facet.get('limit', 10)), 'numBuckets': True, 'allBuckets': True, 'sort': sort, 'missing': facet.get('missing', False) #'prefix': '' # Forbidden on numeric fields } if int(facet['mincount']): _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0 if 'start' in facet and not facet.get('type') == 'field': _f[f_name].update({ 'type': 'range', 'start': facet['start'], 'end': facet['end'], 'gap': facet['gap'] }) # Only on dim 1 currently if can_range or (timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'): facet['widgetType'] = widget['widgetType'] _f[f_name].update(self._get_time_filter_query(timeFilter, facet, collection)) if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter, collection) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter, collection) else: agg_function = self._get_aggregate_function(facet) _f['facet'] = { 'agg_%02d_00:%s' % (dim, agg_function): agg_function } for i, _f_agg in enumerate(facets[1:], 1): if _f_agg['aggregate']['function'] != 'count': agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension break def select(self, collection, query=None, rows=100, start=0): if query is None: query = EMPTY_QUERY.get() params = self._get_params() + ( ('q', query), ('wt', 'json'), ('rows', rows), ('start', start), ) response = self._root.get('%s/select' % collection, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += ( ('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def collections(self): # To drop, used in indexer v1 try: params = self._get_params() + ( ('detail', 'true'), ('path', '/clusterstate.json'), ) response = self._root.get('zookeeper', params=params) return json.loads(response['znode'].get('data', '{}')) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def collections2(self): try: params = self._get_params() + ( ('action', 'LIST'), ('wt', 'json'), ) return self._root.get('admin/collections', params=params)['collections'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def config(self, name): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('%s/config' % name, params=params) return self._get_json(response)['config'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def configs(self): try: params = self._get_params() + ( ('action', 'LIST'), ('wt', 'json'), ) return self._root.get('admin/configs', params=params)['configSets'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def create_config(self, name, base_config, immutable=False): try: params = self._get_params() + ( ('action', 'CREATE'), ('name', name), ('baseConfigSet', base_config), ('configSetProp.immutable', immutable), ('wt', 'json'), ) return self._root.post('admin/configs', params=params, contenttype='application/json') except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def delete_config(self, name): response = {'status': -1, 'message': ''} try: params = self._get_params() + ( ('action', 'DELETE'), ('name', name), ('wt', 'json') ) data = self._root.get('admin/configs', params=params) if data['responseHeader']['status'] == 0: response['status'] = 0 else: response['message'] = "Could not remove config: %s" % data except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) return response def list_aliases(self): try: params = self._get_params() + ( ('action', 'LISTALIASES'), ('wt', 'json'), ) return self._root.get('admin/collections', params=params)['aliases'] or [] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def collection_or_core(self, hue_collection): if hue_collection.is_core_only: return self.core(hue_collection.name) else: return self.collection(hue_collection.name) def collection(self, name): try: collections = self.collections() return collections[name] except Exception as e: raise PopupException(e, title=_('Error while accessing Solr')) def create_collection2(self, name, config_name=None, shards=1, replication=1, **kwargs): try: params = self._get_params() + ( ('action', 'CREATE'), ('name', name), ('numShards', shards), ('replicationFactor', replication), ('wt', 'json') ) if config_name: params += ( ('collection.configName', config_name), ) if kwargs: params += tuple(((key, val) for key, val in kwargs.items())) response = self._root.post('admin/collections', params=params, contenttype='application/json') response_data = self._get_json(response) if response_data.get('failure'): raise PopupException(_('Collection could not be created: %(failure)s') % response_data) else: return response_data except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def update_config(self, name, properties): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.post('%(collection)s/config' % {'collection': name}, params=params, data=json.dumps(properties), contenttype='application/json') return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def add_fields(self, name, fields): try: params = self._get_params() + ( ('wt', 'json'), ) data = {'add-field': fields} response = self._root.post('%(collection)s/schema' % {'collection': name}, params=params, data=json.dumps(data), contenttype='application/json') return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def create_core(self, name, instance_dir, shards=1, replication=1): try: params = self._get_params() + ( ('action', 'CREATE'), ('name', name), ('instanceDir', instance_dir), ('wt', 'json'), ) response = self._root.post('admin/cores', params=params, contenttype='application/json') if response.get('responseHeader', {}).get('status', -1) == 0: return True else: LOG.error("Could not create core. Check response:\n%s" % json.dumps(response, indent=2)) return False except RestException as e: if 'already exists' in e.message: LOG.warn("Could not create collection.", exc_info=True) return False else: raise PopupException(e, title=_('Error while accessing Solr')) def create_alias(self, name, collections): try: params = self._get_params() + ( ('action', 'CREATEALIAS'), ('name', name), ('collections', ','.join(collections)), ('wt', 'json'), ) response = self._root.post('admin/collections', params=params, contenttype='application/json') if response.get('responseHeader', {}).get('status', -1) != 0: raise PopupException(_("Could not create or edit alias: %s") % response) else: return response except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def delete_alias(self, name): try: params = self._get_params() + ( ('action', 'DELETEALIAS'), ('name', name), ('wt', 'json'), ) response = self._root.post('admin/collections', params=params, contenttype='application/json') if response.get('responseHeader', {}).get('status', -1) != 0: msg = _("Could not delete alias. Check response:\n%s") % json.dumps(response, indent=2) LOG.error(msg) raise PopupException(msg) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def delete_collection(self, name): response = {'status': -1, 'message': ''} try: params = self._get_params() + ( ('action', 'DELETE'), ('name', name), ('wt', 'json') ) data = self._root.post('admin/collections', params=params, contenttype='application/json') if data['responseHeader']['status'] == 0: response['status'] = 0 else: response['message'] = "Could not remove collection: %s" % data except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) return response def remove_core(self, name): try: params = self._get_params() + ( ('action', 'UNLOAD'), ('name', name), ('deleteIndex', 'true'), ('wt', 'json') ) response = self._root.post('admin/cores', params=params, contenttype='application/json') if 'success' in response: return True else: LOG.error("Could not remove core. Check response:\n%s" % json.dumps(response, indent=2)) return False except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def cores(self): try: params = self._get_params() + ( ('wt', 'json'), ) return self._root.get('admin/cores', params=params)['status'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def core(self, core): try: params = self._get_params() + ( ('wt', 'json'), ('core', core), ) return self._root.get('admin/cores', params=params) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def get_schema(self, collection): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('%(core)s/schema' % {'core': collection}, params=params) return self._get_json(response)['schema'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) # Deprecated def schema(self, core): try: params = self._get_params() + ( ('wt', 'json'), ('file', 'schema.xml'), ) return self._root.get('%(core)s/admin/file' % {'core': core}, params=params) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def fields(self, core, dynamic=False): try: params = self._get_params() + ( ('wt', 'json'), ('fl', '*'), ) if not dynamic: params += (('show', 'schema'),) response = self._root.get('%(core)s/admin/luke' % {'core': core}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def luke(self, core): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('%(core)s/admin/luke' % {'core': core}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def schema_fields(self, core): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('%(core)s/schema' % {'core': core}, params=params) response_json = self._get_json(response) fields = response_json['schema']['fields'] if response_json['schema'].get('uniqueKey'): for field in fields: if field['name'] == response_json['schema']['uniqueKey']: field['primary_key'] = 'true' return { 'fields': fields, 'responseHeader': response_json['responseHeader'] } except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def stats(self, core, fields, query=None, facet=''): try: params = self._get_params() + ( ('q', self._get_q(query) if query is not None else EMPTY_QUERY.get()), ('wt', 'json'), ('rows', 0), ('stats', 'true'), ) if query is not None: params += self._get_fq(None, query) if facet: params += (('stats.facet', facet),) params += tuple([('stats.field', field) for field in fields]) response = self._root.get('%(core)s/select' % {'core': core}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def terms(self, core, field, properties=None): try: params = self._get_params() + ( ('wt', 'json'), ('rows', 0), ('terms.fl', field), ) if properties: for key, val in properties.items(): params += ((key, val),) response = self._root.get('%(core)s/terms' % {'core': core}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def info_system(self): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('admin/info/system', params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def sql(self, collection, statement): try: if 'limit' not in statement.lower(): # rows is not supported statement = statement + ' LIMIT 100' params = self._get_params() + ( ('wt', 'json'), ('rows', 0), ('stmt', statement), ('rows', 100), ('start', 0), ) response = self._root.get('%(collection)s/sql' % {'collection': collection}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def get(self, core, doc_id): collection_name = core['name'] try: params = self._get_params() + ( ('id', doc_id), ('wt', 'json'), ) response = self._root.get('%(core)s/get' % {'core': collection_name}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def export(self, name, query, fl, sort, rows=100): try: params = self._get_params() + ( ('q', query), ('fl', fl), ('sort', sort), ('rows', rows), ('wt', 'json'), ) response = self._root.get('%(name)s/export' % {'name': name}, params=params) return self._get_json(response) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def update(self, collection_or_core_name, data, content_type='csv', version=None, **kwargs): if content_type == 'csv': content_type = 'application/csv' elif content_type == 'json': content_type = 'application/json' else: LOG.error("Trying to update collection %s with content type %s. Allowed content types: csv/json" % (collection_or_core_name, content_type)) params = self._get_params() + ( ('wt', 'json'), ('overwrite', 'true'), ('commit', 'true'), ) if version is not None: params += ( ('_version_', version), ('versions', 'true') ) if kwargs: params += tuple(((key, val) for key, val in kwargs.items())) response = self._root.post('%s/update' % collection_or_core_name, contenttype=content_type, params=params, data=data) return self._get_json(response) # Deprecated def aliases(self): try: params = self._get_params() + ( # Waiting for SOLR-4968 ('detail', 'true'), ('path', '/aliases.json'), ) response = self._root.get('zookeeper', params=params) return json.loads(response['znode'].get('data', '{}')).get('collection', {}) except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) # Deprecated def create_collection(self, name, shards=1, replication=1): try: params = self._get_params() + ( ('action', 'CREATE'), ('name', name), ('numShards', shards), ('replicationFactor', replication), ('collection.configName', name), ('wt', 'json') ) response = self._root.post('admin/collections', params=params, contenttype='application/json') if 'success' in response: return True else: LOG.error("Could not create collection. Check response:\n%s" % json.dumps(response, indent=2)) return False except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) # Deprecated def remove_collection(self, name): try: params = self._get_params() + ( ('action', 'DELETE'), ('name', name), ('wt', 'json') ) response = self._root.post('admin/collections', params=params, contenttype='application/json') if 'success' in response: return True else: LOG.error("Could not remove collection. Check response:\n%s" % json.dumps(response, indent=2)) return False except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', SERVER_USER.get()), ('doAs', self._user),) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8') @classmethod def _get_aggregate_function(cls, facet): f = facet['aggregate'] if f['function'] == 'formula': return f['formula'] elif f['function'] == 'field': return f['value'] else: fields = [facet['field']] if f['function'] == 'median': f['function'] = 'percentile' fields.append('50') elif f['function'] == 'percentile': fields.append(str(f['percentile'])) f['function'] = 'percentile' return '%s(%s)' % (f['function'], ','.join(fields)) def _get_range_borders(self, collection, query): props = {} time_field = collection['timeFilter'].get('field') if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'): # fqs overrides main time filter # No longer override props['time_filter_overrides'] = [] props['time_field'] = time_field if collection['timeFilter']['type'] == 'rolling': props['field'] = collection['timeFilter']['field'] props['from'] = 'NOW-%s' % collection['timeFilter']['value'] props['to'] = 'NOW' props['gap'] = GAPS.get(collection['timeFilter']['value']) elif collection['timeFilter']['type'] == 'fixed': props['field'] = collection['timeFilter']['field'] props['from'] = collection['timeFilter'].get('from', 'NOW-7DAYS') props['to'] = collection['timeFilter'].get('to', 'NOW') props['fixed'] = True return props def _get_time_filter_query(self, timeFilter, facet, collection): properties = facet.get('properties', facet) if timeFilter: props = {} # If the start & end are equal to min/max, then we want to show the whole domain (either interval now-x or static) # In that case use timeFilter values if properties['start'] == properties['min'] and properties['end'] == properties['max']: stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} properties['start'] = None properties['end'] = None else: # The user has zoomed in. Only show that section. stat_facet = {'min': properties['min'], 'max': properties['max']} _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS=properties['slot']) gap = props['gap'] return { 'min': '%(min)s' % props, 'max': '%(max)s' % props, 'start': '%(start)s' % props, 'end': '%(end)s' % props, 'gap': '%(gap)s' % props, } else: props = {} # If the start & end are equal to min/max, then we want to show the whole domain. Since min/max can change, we fetch latest values and update start/end if properties['start'] == properties['min'] and properties['end'] == properties['max']: stats_json = self.stats(collection['name'], [facet['field']]) stat_facet = stats_json['stats']['stats_fields'][facet['field']] properties['start'] = None properties['end'] = None else: # the user has zoomed in. Only show that section. stat_facet = {'min': properties['min'], 'max': properties['max']} _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS = properties['slot']) return { 'start': '%(start)s' % props, 'end': '%(end)s' % props, 'gap': '%(gap)s' % props, 'min': '%(min)s' % props, 'max': '%(max)s' % props, } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get('time_filter_overrides'): params += (('fq', urllib_unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),) # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] if value or value is False: exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in force_unicode(value): value = force_unicode(value).replace('"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) else: # Handle empty value selection that are returned using solr facet.missing value = "*" exclude = '-' f.append('%s%s:%s' % (exclude, field, value)) _params ='{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib_unquote(utf_quoter(_params))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib_unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib_unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'map': _keys = fq.copy() _keys.update(fq['properties']) params += (('fq', '{!tag=%(id)s}' % fq + urllib_unquote( utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),) nested_fields = self._get_nested_fields(collection) if nested_fields: params += (('fq', urllib_unquote(utf_quoter(' OR '.join(nested_fields)))),) return params def _get_dimension_aggregates(self, facets): aggregates = [] for agg in facets: if agg['aggregate']['function'] != 'count': aggregates.append(agg) else: return aggregates return aggregates def _get_nested_fields(self, collection): if collection and collection.get('nested') and collection['nested']['enabled']: return [field['filter'] for field in self._flatten_schema(collection['nested']['schema']) if field['selected']] else: return [] def _flatten_schema(self, level): fields = [] for field in level: fields.append(field) if field['values']: fields.extend(self._flatten_schema(field['values'])) return fields @classmethod def _get_json(cls, response): if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError as e: # Got some null bytes in the response LOG.error('%s: %s' % (new_str(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response def uniquekey(self, collection): try: params = self._get_params() + ( ('wt', 'json'), ) response = self._root.get('%s/schema/uniquekey' % collection, params=params) return self._get_json(response)['uniqueKey'] except RestException as e: raise PopupException(e, title=_('Error while accessing Solr'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def _get_params(self): if self.security_enabled: return (('doAs', self._user), ) return ( ('user.name', SERVER_USER.get()), ('doAs', self._user), ) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([ q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs'] ]).encode('utf-8') def _get_aggregate_function(self, facet): props = { 'field': facet['field'], 'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate'] } if props['aggregate'] == 'median': return 'percentile(%(field)s,50)' % props else: return '%(aggregate)s(%(field)s)' % props def _get_range_borders(self, collection, query): props = {} GAPS = { '5MINUTES': { 'histogram-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'bucket-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'bar-widget': { 'coeff': '+3', 'unit': 'SECONDS' }, # ~100 slots 'facet-widget': { 'coeff': '+1', 'unit': 'MINUTES' }, # ~10 slots }, '30MINUTES': { 'histogram-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'bucket-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'bar-widget': { 'coeff': '+20', 'unit': 'SECONDS' }, 'facet-widget': { 'coeff': '+5', 'unit': 'MINUTES' }, }, '1HOURS': { 'histogram-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'bucket-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'bar-widget': { 'coeff': '+30', 'unit': 'SECONDS' }, 'facet-widget': { 'coeff': '+10', 'unit': 'MINUTES' }, }, '12HOURS': { 'histogram-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+7', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+1', 'unit': 'HOURS' }, }, '1DAYS': { 'histogram-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+15', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+3', 'unit': 'HOURS' }, }, '2DAYS': { 'histogram-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'bucket-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'bar-widget': { 'coeff': '+30', 'unit': 'MINUTES' }, 'facet-widget': { 'coeff': '+6', 'unit': 'HOURS' }, }, '7DAYS': { 'histogram-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'bucket-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'bar-widget': { 'coeff': '+3', 'unit': 'HOURS' }, 'facet-widget': { 'coeff': '+1', 'unit': 'DAYS' }, }, '1MONTHS': { 'histogram-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'bucket-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'bar-widget': { 'coeff': '+12', 'unit': 'HOURS' }, 'facet-widget': { 'coeff': '+5', 'unit': 'DAYS' }, }, '3MONTHS': { 'histogram-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+1', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+30', 'unit': 'DAYS' }, }, '1YEARS': { 'histogram-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+3', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+12', 'unit': 'MONTHS' }, }, '2YEARS': { 'histogram-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'bucket-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'bar-widget': { 'coeff': '+7', 'unit': 'DAYS' }, 'facet-widget': { 'coeff': '+3', 'unit': 'MONTHS' }, }, '10YEARS': { 'histogram-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'bucket-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'bar-widget': { 'coeff': '+1', 'unit': 'MONTHS' }, 'facet-widget': { 'coeff': '+1', 'unit': 'YEARS' }, } } time_field = collection['timeFilter'].get('field') if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'): # fqs overrides main time filter fq_time_ids = [ fq['id'] for fq in query['fqs'] if fq['field'] == time_field ] props['time_filter_overrides'] = fq_time_ids props['time_field'] = time_field if collection['timeFilter']['type'] == 'rolling': props['field'] = collection['timeFilter']['field'] props['from'] = 'NOW-%s' % collection['timeFilter']['value'] props['to'] = 'NOW' props['gap'] = GAPS.get(collection['timeFilter']['value']) elif collection['timeFilter']['type'] == 'fixed': props['field'] = collection['timeFilter']['field'] props['from'] = collection['timeFilter']['from'] props['to'] = collection['timeFilter']['to'] props['fixed'] = True return props def _get_time_filter_query(self, timeFilter, facet): if 'fixed' in timeFilter: props = {} stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max']) gap = props['gap'] unit = re.split('\d+', gap)[1] return { 'start': '%(from)s/%(unit)s' % { 'from': timeFilter['from'], 'unit': unit }, 'end': '%(to)s/%(unit)s' % { 'to': timeFilter['to'], 'unit': unit }, 'gap': '%(gap)s' % props, # add a 'auto' } else: gap = timeFilter['gap'][facet['widgetType']] return { 'start': '%(from)s/%(unit)s' % { 'from': timeFilter['from'], 'unit': gap['unit'] }, 'end': '%(to)s/%(unit)s' % { 'to': timeFilter['to'], 'unit': gap['unit'] }, 'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get('time_filter_overrides'): params += (('fq', urllib.unquote( utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))), ) # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [ fq['field'] ] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type( _filter['value']) == list else [ _filter['value'] ] # 2D facets support if fields.index(field) < len( values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in force_unicode( value): value = force_unicode(value).replace( '"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params = '{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))), ) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([ urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties']) ])), ) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([ urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties']) ])), ) elif fq['type'] == 'map': _keys = fq.copy() _keys.update(fq['properties']) params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote( utf_quoter( '%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))), ) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']), ) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update( self._get_time_filter_query(timeFilter, facet)) params += (( 'facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += (( 'facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } if 'start' in facet['properties']: _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update( self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'] }) if facet['properties']['facets']: if facet['properties']['facets'][0][ 'aggregate'] == 'count': _f['facet'] = { 'd2': { 'type': 'terms', 'field': '%(field)s' % facet['properties']['facets'][0], 'limit': int(facet['properties']['facets'][0].get( 'limit', 10)), 'mincount': int(facet['properties']['facets'][0] ['mincount']) } } if len(facet['properties']['facets'] ) > 1: # Get 3rd dimension calculation _f['facet']['d2']['facet'] = { 'd2': self._get_aggregate_function( facet['properties']['facets'][1]) } else: _f['facet'] = { 'd2': self._get_aggregate_function( facet['properties']['facets'][0]) } json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function( facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet[ 'widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += (( 'facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += (('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template'][ 'isGridLayout']: fields = set( collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add( collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add( collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))), ) else: params += (('fl', '*'), ) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter( lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append( '%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += (('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += (('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__( self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get() ): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke("HEAD", "/") def _get_params(self): if self.security_enabled: return (("doAs", self._user),) return (("user.name", DEFAULT_USER), ("doAs", self._user)) def _get_q(self, query): q_template = "(%s)" if len(query["qs"]) >= 2 else "%s" return "OR".join([q_template % (q["q"] or EMPTY_QUERY.get()) for q in query["qs"]]).encode("utf-8") def _get_aggregate_function(self, facet): props = { "field": facet["field"], "aggregate": facet["properties"]["aggregate"] if "properties" in facet else facet["aggregate"], } if props["aggregate"] == "median": return "percentile(%(field)s,50)" % props else: return "%(aggregate)s(%(field)s)" % props def _get_range_borders(self, collection, query): props = {} GAPS = { "5MINUTES": { "histogram-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "bucket-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "bar-widget": {"coeff": "+3", "unit": "SECONDS"}, # ~100 slots "facet-widget": {"coeff": "+1", "unit": "MINUTES"}, # ~10 slots }, "30MINUTES": { "histogram-widget": {"coeff": "+20", "unit": "SECONDS"}, "bucket-widget": {"coeff": "+20", "unit": "SECONDS"}, "bar-widget": {"coeff": "+20", "unit": "SECONDS"}, "facet-widget": {"coeff": "+5", "unit": "MINUTES"}, }, "1HOURS": { "histogram-widget": {"coeff": "+30", "unit": "SECONDS"}, "bucket-widget": {"coeff": "+30", "unit": "SECONDS"}, "bar-widget": {"coeff": "+30", "unit": "SECONDS"}, "facet-widget": {"coeff": "+10", "unit": "MINUTES"}, }, "12HOURS": { "histogram-widget": {"coeff": "+7", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+7", "unit": "MINUTES"}, "bar-widget": {"coeff": "+7", "unit": "MINUTES"}, "facet-widget": {"coeff": "+1", "unit": "HOURS"}, }, "1DAYS": { "histogram-widget": {"coeff": "+15", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+15", "unit": "MINUTES"}, "bar-widget": {"coeff": "+15", "unit": "MINUTES"}, "facet-widget": {"coeff": "+3", "unit": "HOURS"}, }, "2DAYS": { "histogram-widget": {"coeff": "+30", "unit": "MINUTES"}, "bucket-widget": {"coeff": "+30", "unit": "MINUTES"}, "bar-widget": {"coeff": "+30", "unit": "MINUTES"}, "facet-widget": {"coeff": "+6", "unit": "HOURS"}, }, "7DAYS": { "histogram-widget": {"coeff": "+3", "unit": "HOURS"}, "bucket-widget": {"coeff": "+3", "unit": "HOURS"}, "bar-widget": {"coeff": "+3", "unit": "HOURS"}, "facet-widget": {"coeff": "+1", "unit": "DAYS"}, }, "1MONTHS": { "histogram-widget": {"coeff": "+12", "unit": "HOURS"}, "bucket-widget": {"coeff": "+12", "unit": "HOURS"}, "bar-widget": {"coeff": "+12", "unit": "HOURS"}, "facet-widget": {"coeff": "+5", "unit": "DAYS"}, }, "3MONTHS": { "histogram-widget": {"coeff": "+1", "unit": "DAYS"}, "bucket-widget": {"coeff": "+1", "unit": "DAYS"}, "bar-widget": {"coeff": "+1", "unit": "DAYS"}, "facet-widget": {"coeff": "+30", "unit": "DAYS"}, }, "1YEARS": { "histogram-widget": {"coeff": "+3", "unit": "DAYS"}, "bucket-widget": {"coeff": "+3", "unit": "DAYS"}, "bar-widget": {"coeff": "+3", "unit": "DAYS"}, "facet-widget": {"coeff": "+12", "unit": "MONTHS"}, }, "2YEARS": { "histogram-widget": {"coeff": "+7", "unit": "DAYS"}, "bucket-widget": {"coeff": "+7", "unit": "DAYS"}, "bar-widget": {"coeff": "+7", "unit": "DAYS"}, "facet-widget": {"coeff": "+3", "unit": "MONTHS"}, }, "10YEARS": { "histogram-widget": {"coeff": "+1", "unit": "MONTHS"}, "bucket-widget": {"coeff": "+1", "unit": "MONTHS"}, "bar-widget": {"coeff": "+1", "unit": "MONTHS"}, "facet-widget": {"coeff": "+1", "unit": "YEARS"}, }, } time_field = collection["timeFilter"].get("field") if time_field and (collection["timeFilter"]["value"] != "all" or collection["timeFilter"]["type"] == "fixed"): # fqs overrides main time filter fq_time_ids = [fq["id"] for fq in query["fqs"] if fq["field"] == time_field] props["time_filter_overrides"] = fq_time_ids props["time_field"] = time_field if collection["timeFilter"]["type"] == "rolling": props["field"] = collection["timeFilter"]["field"] props["from"] = "NOW-%s" % collection["timeFilter"]["value"] props["to"] = "NOW" props["gap"] = GAPS.get(collection["timeFilter"]["value"]) elif collection["timeFilter"]["type"] == "fixed": props["field"] = collection["timeFilter"]["field"] props["from"] = collection["timeFilter"]["from"] props["to"] = collection["timeFilter"]["to"] props["fixed"] = True return props def _get_time_filter_query(self, timeFilter, facet): if "fixed" in timeFilter: props = {} stat_facet = {"min": timeFilter["from"], "max": timeFilter["to"]} _compute_range_facet(facet["widgetType"], stat_facet, props, stat_facet["min"], stat_facet["max"]) gap = props["gap"] unit = re.split("\d+", gap)[1] return { "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": unit}, "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": unit}, "gap": "%(gap)s" % props, # add a 'auto' } else: gap = timeFilter["gap"][facet["widgetType"]] return { "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": gap["unit"]}, "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": gap["unit"]}, "gap": "%(coeff)s%(unit)s/%(unit)s" % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get("time_filter_overrides"): params += (("fq", urllib.unquote(utf_quoter("%(field)s:[%(from)s TO %(to)s]" % timeFilter))),) # Merge facets queries on same fields grouped_fqs = groupby(query["fqs"], lambda x: (x["type"], x["field"])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq["filter"]: field_fq["filter"].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq["type"] == "field": fields = fq["field"] if type(fq["field"]) == list else [fq["field"]] # 2D facets support for field in fields: f = [] for _filter in fq["filter"]: values = ( _filter["value"] if type(_filter["value"]) == list else [_filter["value"]] ) # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = "-" if _filter["exclude"] else "" if value is not None and " " in force_unicode(value): value = force_unicode(value).replace('"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append("%s{!field f=%s}%s" % (exclude, field, value)) _params = "{!tag=%(id)s}" % fq + " ".join(f) params += (("fq", urllib.unquote(utf_quoter(_params))),) elif fq["type"] == "range": params += ( ( "fq", "{!tag=%(id)s}" % fq + " ".join( [ urllib.unquote( utf_quoter( "%s%s:[%s TO %s}" % ("-" if field["exclude"] else "", fq["field"], f["from"], f["to"]) ) ) for field, f in zip(fq["filter"], fq["properties"]) ] ), ), ) elif fq["type"] == "range-up": params += ( ( "fq", "{!tag=%(id)s}" % fq + " ".join( [ urllib.unquote( utf_quoter( "%s%s:[%s TO %s}" % ( "-" if field["exclude"] else "", fq["field"], f["from"] if fq["is_up"] else "*", "*" if fq["is_up"] else f["from"], ) ) ) for field, f in zip(fq["filter"], fq["properties"]) ] ), ), ) elif fq["type"] == "map": _keys = fq.copy() _keys.update(fq["properties"]) params += ( ( "fq", "{!tag=%(id)s}" % fq + urllib.unquote( utf_quoter( "%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}" % _keys ) ), ), ) return params def query(self, collection, query): solr_query = {} solr_query["collection"] = collection["name"] if query.get("download"): solr_query["rows"] = 1000 solr_query["start"] = 0 else: solr_query["rows"] = int(collection["template"]["rows"] or 10) solr_query["start"] = int(query["start"]) solr_query["rows"] = min(solr_query["rows"], 1000) solr_query["start"] = min(solr_query["start"], 10000) params = self._get_params() + ( ("q", self._get_q(query)), ("wt", "json"), ("rows", solr_query["rows"]), ("start", solr_query["start"]), ) if any(collection["facets"]): params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10)) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection["facets"]: if facet["type"] == "query": params += (("facet.query", "%s" % facet["field"]),) elif facet["type"] == "range" or facet["type"] == "range-up": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], "mincount": int(facet["properties"]["mincount"]), } if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "histogram-widget" ) ): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ( "facet.range", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "field": keys = { "id": "%(id)s" % facet, "field": facet["field"], "key": "%(field)s-%(id)s" % facet, "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } params += ( ( "facet.field", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s" % keys, ), ) elif facet["type"] == "nested": _f = { "field": facet["field"], "limit": int(facet["properties"].get("limit", 10)) + (1 if facet["widgetType"] == "facet-widget" else 0), "mincount": int(facet["properties"]["mincount"]), } if "start" in facet["properties"]: _f.update( { "type": "range", "start": facet["properties"]["start"], "end": facet["properties"]["end"], "gap": facet["properties"]["gap"], } ) if ( timeFilter and timeFilter["time_field"] == facet["field"] and ( facet["id"] not in timeFilter["time_filter_overrides"] or facet["widgetType"] != "bucket-widget" ) ): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update({"type": "terms", "field": facet["field"], "excludeTags": facet["id"]}) if facet["properties"]["facets"]: if facet["properties"]["facets"][0]["aggregate"] == "count": _f["facet"] = { "d2": { "type": "terms", "field": "%(field)s" % facet["properties"]["facets"][0], "limit": int(facet["properties"]["facets"][0].get("limit", 10)), "mincount": int(facet["properties"]["facets"][0]["mincount"]), } } if len(facet["properties"]["facets"]) > 1: # Get 3rd dimension calculation _f["facet"]["d2"]["facet"] = { "d2": self._get_aggregate_function(facet["properties"]["facets"][1]) } else: _f["facet"] = {"d2": self._get_aggregate_function(facet["properties"]["facets"][0])} json_facets[facet["id"]] = _f elif facet["type"] == "function": json_facets[facet["id"]] = self._get_aggregate_function(facet) json_facets["processEmpty"] = True elif facet["type"] == "pivot": if facet["properties"]["facets"] or facet["widgetType"] == "map-widget": fields = facet["field"] fields_limits = [] for f in facet["properties"]["facets"]: fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"])) fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"])) fields += "," + f["field"] keys = { "id": "%(id)s" % facet, "key": "%(field)s-%(id)s" % facet, "field": facet["field"], "fields": fields, "limit": int(facet["properties"].get("limit", 10)), "mincount": int(facet["properties"]["mincount"]), "fields_limits": " ".join(fields_limits), } params += ( ( "facet.pivot", "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s" % keys, ), ) if json_facets: params += (("json.facet", json.dumps(json_facets)),) params += self._get_fq(collection, query) if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]: fields = set( collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else [] ) # Add field if needed if collection["template"]["leafletmap"].get("latitudeField"): fields.add(collection["template"]["leafletmap"]["latitudeField"]) if collection["template"]["leafletmap"].get("longitudeField"): fields.add(collection["template"]["leafletmap"]["longitudeField"]) if collection["template"]["leafletmap"].get("labelField"): fields.add(collection["template"]["leafletmap"]["labelField"]) params += (("fl", urllib.unquote(utf_quoter(",".join(list(fields))))),) else: params += (("fl", "*"),) params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000)) if collection["template"]["fieldsSelected"]: fields = [] for field in collection["template"]["fieldsSelected"]: attribute_field = filter( lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"] ) if attribute_field: if attribute_field[0]["sort"]["direction"]: fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"])) if fields: params += (("sort", ",".join(fields)),) response = self._root.get("%(collection)s/select" % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ("suggest", "true"), ("suggest.build", "true"), ("suggest.q", query["q"]), ("wt", "json"), ) if query.get("dictionary"): params += (("suggest.dictionary", query["dictionary"]),) response = self._root.get("%s/suggest" % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_("Error while accessing Solr"))
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url, ) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.user self._thread_local.user = user return curr def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime') def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class DataWarehouse2Api(object): def __init__(self, user=None): self._api_url = '%s/dw' % K8S.API_URL.get().rstrip('/') self.user = user self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(False) self._root = Resource(self._client) def list_k8_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: cluster['clusterName'] = cluster['name'] cluster['workersGroupSize'] = cluster['workerReplicas'] cluster['instanceType'] = '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster cluster['creationDate'] = str(datetime.now()) return clusters def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, instance_type, environment_name, workers_group_size=3, namespace_name=None, cloudera_manager_username='******', cloudera_manager_password='******'): data = { 'clusterName': cluster_name, 'cdhVersion': cdh_version or 'CDH6.3', 'workerCpuCores': 1, 'workerMemoryInGib': 1, 'workerReplicas': workers_group_size, 'workerAutoResize': False } return self._root.post('createCluster', data=json.dumps(data), contenttype="application/json") def list_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: cluster['clusterName'] = cluster['name'] cluster['workersGroupSize'] = cluster['workerReplicas'] cluster['instanceType'] = 'Data Warehouse'# '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster cluster['creationDate'] = str(datetime.now()) return clusters def delete_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) return { 'result': self._root.post('deleteCluster', data=data, contenttype="application/json") } def describe_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) data = self._root.post('describeCluster', data=data, contenttype="application/json") data['cluster']['clusterName'] = data['cluster']['name'] data['cluster']['cdhVersion'] = 'Data Warehouse' return data def update_cluster(self, **params): return self._root.post('updateCluster', data=json.dumps(params), contenttype="application/json")
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url,) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.user self._thread_local.user = user return curr def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, "username"): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return {"doAs": self.user, "timezone": TIME_ZONE.get()} return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()} def _get_oozie_properties(self, properties=None): defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ("name", "user", "group", "status", "startcreatedtime", "text") VALID_LOG_FILTERS = set(("recent", "limit", "loglevel", "text")) def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) if filters is None: filters = [] params["jobtype"] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) if jobtype == "wf": wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == "coord": wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs("wf", offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs("coord", offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs("bundle", offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) if filters is None: filters = {} params.update({"order": "desc"}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) resp = self._root.get("job/%s" % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" return self._root.get("job/%s" % (jobid,), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["logfilter"] = ";".join(filter_list) return self._root.get("job/%s" % (jobid,), params) def get_job_status(self, jobid): params = self._get_params() params["show"] = "status" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_action(self, action_id): if "C@" in action_id: Klass = CoordinatorAction elif "B@" in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get("job/%s" % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ( "start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun", "change", "ignore", "update", ): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params["action"] = action if parameters is not None: params.update(parameters) return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def dryrun(self, properties=None): defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params["action"] = "dryrun" return self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params["action"] = "rerun" return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get("admin/metrics", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get("sla", params) return resp["slaSummaryList"]
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url, ) @property def url(self): return self._url def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, headers={'Accept': _JSON_CONTENT_TYPE})
class ResourceManagerApi(object): def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(rm_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() return params def __str__(self): return "ResourceManagerApi at %s" % (self._url,) def setuser(self, user): curr = self.user self._thread_local.user = user return curr @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): params = self._get_params() return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): params = self._get_params() params.update(kwargs) return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): data = {'state': 'KILLED'} token = None # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app. if self.security_enabled and False: full_token = self.delegation_token() if 'token' not in full_token: raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token)) data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token') LOG.debug('Received delegation token %s' % full_token) try: params = self._get_params() return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) finally: if token: self.cancel_token(token) def delegation_token(self): params = self._get_params() data = {'renewer': self.username} return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def cancel_token(self, token): params = self._get_params() headers = {'Hadoop-YARN-RM-Delegation-Token': token} LOG.debug('Canceling delegation token of ' % self.username) return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']), ) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update( self._get_time_filter_query(timeFilter, facet)) params += (( 'facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += (( 'facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'text-facet-widget' else 0), 'mincount': int(facet['properties']['mincount']), 'sort': { 'count': facet['properties']['sort'] }, } print facet if facet['properties']['domain'].get( 'blockParent' ) or facet['properties']['domain'].get('blockChildren'): _f['domain'] = {} if facet['properties']['domain'].get('blockParent'): _f['domain']['blockParent'] = ' OR '.join( facet['properties']['domain']['blockParent']) if facet['properties']['domain'].get('blockChildren'): _f['domain']['blockChildren'] = ' OR '.join( facet['properties']['domain']['blockChildren']) if 'start' in facet['properties'] and not facet[ 'properties'].get('type') == 'field': _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet[ 'field'] and ( facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update( self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'], 'offset': 0, 'numBuckets': True, 'allBuckets': True, 'prefix': '' }) if facet['properties']['canRange'] and not facet[ 'properties']['isDate']: del _f['mincount'] # Numeric fields do not support if facet['properties']['facets']: self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1) if facet['widgetType'] == 'text-facet-widget': _fname = _f['facet'].keys()[0] _f['sort'] = {_fname: facet['properties']['sort']} # domain = '-d2:NaN' # Solr 6.4 json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function( facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet[ 'widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += (( 'facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += (('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template'][ 'isGridLayout']: fields = set( collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add( collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add( collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) fl = urllib.unquote(utf_quoter(','.join(list(fields)))) else: fl = '*' nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote( utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) params += (('fl', fl), ) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter( lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append( '%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += (('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) if facet['aggregate']['function'] == 'count': if 'facet' not in _f: _f['facet'] = {f_name: {}} else: _f['facet'][f_name] = {} _f = _f['facet'] _f[f_name] = { 'type': 'terms', 'field': '%(field)s' % facet, 'limit': int(facet.get('limit', 10)), 'mincount': int(facet['mincount']), 'numBuckets': True, 'allBuckets': True, 'prefix': '' } if widget['widgetType'] == 'tree2-widget' and facets[-1][ 'aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim) else: agg_function = self._get_aggregate_function(facet) _f['facet'] = { 'agg_%02d_00:%s' % (dim, agg_function): agg_function } for i, _f_agg in enumerate(facets[1:], 1): if _f_agg['aggregate']['function'] != 'count': agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1) # Get n+1 dimension break def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += (('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): if solr_url is None and hasattr(SOLR_URL, 'get'): solr_url = SOLR_URL.get() if solr_url: self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled or SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = {} if facet['properties']['facets']: self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter) if facet['properties'].get('domain'): if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'): _f['domain'] = {} if facet['properties']['domain'].get('blockParent'): _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent']) if facet['properties']['domain'].get('blockChildren'): _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren']) if _f: sort = {'count': facet['properties']['facets'][0]['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' dim_key = [key for key in _f['facet'].keys() if 'dim' in key][0] _f['facet'][dim_key].update({ 'excludeTags': facet['id'], 'offset': 0, 'numBuckets': True, 'allBuckets': True, 'sort': sort #'prefix': '' # Forbidden on numeric fields }) json_facets[facet['id']] = _f['facet'][dim_key] elif facet['type'] == 'function': if facet['properties']['facets']: json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0]) if facet['properties']['compare']['is_enabled']: # TODO: global compare override unit = re.split('\d+', facet['properties']['compare']['gap'])[1] json_facets[facet['id']] = { 'type': 'range', 'field': collection['timeFilter'].get('field'), 'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']), 'end': 'NOW/%s' % unit, 'gap': '+%(gap)s' % facet['properties']['compare'], 'facet': {facet['id']: json_facets[facet['id']]} } if facet['properties']['filter']['is_enabled']: json_facets[facet['id']] = { 'type': 'query', 'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(), 'facet': {facet['id']: json_facets[facet['id']]} } json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += ( ('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) fl = urllib.unquote(utf_quoter(','.join(Collection2.get_field_list(collection)))) nested_fields = self._get_nested_fields(collection) if nested_fields: fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents id_field = collection.get('idField', 'id') params += ( ('mlt', 'true'), ('mlt.fl', fl.replace(',%s' % id_field, '')), ('mlt.mintf', 1), ('mlt.mindf', 1), ('mlt.maxdf', 50), ('mlt.maxntp', 1000), ('mlt.count', 10), #('mlt.minwl', 1), #('mlt.maxwl', 1), ) fl = '*' params += (('fl', fl),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) if facet['aggregate']['function'] == 'count': if 'facet' not in _f: _f['facet'] = {f_name: {}} else: _f['facet'][f_name] = {} _f = _f['facet'] sort = {'count': facet['sort']} for i, agg in enumerate(self._get_dimension_aggregates(facets)): if agg['sort'] != 'default': agg_function = self._get_aggregate_function(agg) sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']} if sort.get('count') == 'default': sort['count'] = 'desc' _f[f_name] = { 'type': 'terms', 'field': '%(field)s' % facet, 'limit': int(facet.get('limit', 10)), 'mincount': int(facet['mincount']), 'numBuckets': True, 'allBuckets': True, 'sort': sort, 'missing': facet.get('missing', False) #'prefix': '' # Forbidden on numeric fields } if 'start' in facet and not facet.get('type') == 'field': _f[f_name].update({ 'type': 'range', 'start': facet['start'], 'end': facet['end'], 'gap': facet['gap'] }) # Only on dim 1 currently if timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides']): # or facet['widgetType'] != 'bucket-widget'): facet['widgetType'] = widget['widgetType'] _f[f_name].update(self._get_time_filter_query(timeFilter, facet)) if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter) else: self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter) else: agg_function = self._get_aggregate_function(facet) _f['facet'] = { 'agg_%02d_00:%s' % (dim, agg_function): agg_function } for i, _f_agg in enumerate(facets[1:], 1): if _f_agg['aggregate']['function'] != 'count': agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter) # Get n+1 dimension break def select(self, collection, query=None, rows=100, start=0): if query is None: query = EMPTY_QUERY.get() params = self._get_params() + ( ('q', query), ('wt', 'json'), ('rows', rows), ('start', start), ) response = self._root.get('%s/select' % collection, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += ( ('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class ResourceManagerApi(object): def __init__(self, username, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._username = username self._url = posixpath.join(rm_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def _get_params(self): params = {} if self._username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self._username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() return params def __str__(self): return "ResourceManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): params = self._get_params() return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): params = self._get_params() params.update(kwargs) return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): params = self._get_params() return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): data = {'state': 'KILLED'} token = None # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app. if self.security_enabled and False: full_token = self.delegation_token() if 'token' not in full_token: raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token)) data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token') LOG.debug('Received delegation token %s' % full_token) try: params = self._get_params() return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) finally: if token: self.cancel_token(token) def delegation_token(self): params = self._get_params() data = {'renewer': self._username} return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def cancel_token(self, token): params = self._get_params() headers = {'Hadoop-YARN-RM-Delegation-Token': token} LOG.debug('Canceling delegation token of ' % self._username) return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith( 'This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager().kill(app_id) # We need to call the RM
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/') def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def _get_q(self, query): q_template = '(%s)' if len(query['qs']) >= 2 else '%s' return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8') def _get_aggregate_function(self, facet): props = { 'field': facet['field'], 'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate'] } if props['aggregate'] == 'median': return 'percentile(%(field)s,50)' % props else: return '%(aggregate)s(%(field)s)' % props def _get_range_borders(self, collection, query): props = {} GAPS = { '5MINUTES': { 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots }, '30MINUTES': { 'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'bucket-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'bar-widget': {'coeff': '+20', 'unit': 'SECONDS'}, 'facet-widget': {'coeff': '+5', 'unit': 'MINUTES'}, }, '1HOURS': { 'histogram-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'bucket-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'bar-widget': {'coeff': '+30', 'unit': 'SECONDS'}, 'facet-widget': {'coeff': '+10', 'unit': 'MINUTES'}, }, '12HOURS': { 'histogram-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+7', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+1', 'unit': 'HOURS'}, }, '1DAYS': { 'histogram-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+15', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+3', 'unit': 'HOURS'}, }, '2DAYS': { 'histogram-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'bucket-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'bar-widget': {'coeff': '+30', 'unit': 'MINUTES'}, 'facet-widget': {'coeff': '+6', 'unit': 'HOURS'}, }, '7DAYS': { 'histogram-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'bucket-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'bar-widget': {'coeff': '+3', 'unit': 'HOURS'}, 'facet-widget': {'coeff': '+1', 'unit': 'DAYS'}, }, '1MONTHS': { 'histogram-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'bucket-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'bar-widget': {'coeff': '+12', 'unit': 'HOURS'}, 'facet-widget': {'coeff': '+5', 'unit': 'DAYS'}, }, '3MONTHS': { 'histogram-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+1', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+30', 'unit': 'DAYS'}, }, '1YEARS': { 'histogram-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+3', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+12', 'unit': 'MONTHS'}, }, '2YEARS': { 'histogram-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'bucket-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'bar-widget': {'coeff': '+7', 'unit': 'DAYS'}, 'facet-widget': {'coeff': '+3', 'unit': 'MONTHS'}, }, '10YEARS': { 'histogram-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'bucket-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'bar-widget': {'coeff': '+1', 'unit': 'MONTHS'}, 'facet-widget': {'coeff': '+1', 'unit': 'YEARS'}, } } time_field = collection['timeFilter'].get('field') if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'): # fqs overrides main time filter fq_time_ids = [fq['id'] for fq in query['fqs'] if fq['field'] == time_field] props['time_filter_overrides'] = fq_time_ids props['time_field'] = time_field if collection['timeFilter']['type'] == 'rolling': props['field'] = collection['timeFilter']['field'] props['from'] = 'NOW-%s' % collection['timeFilter']['value'] props['to'] = 'NOW' props['gap'] = GAPS.get(collection['timeFilter']['value']) elif collection['timeFilter']['type'] == 'fixed': props['field'] = collection['timeFilter']['field'] props['from'] = collection['timeFilter']['from'] props['to'] = collection['timeFilter']['to'] props['fixed'] = True return props def _get_time_filter_query(self, timeFilter, facet): if 'fixed' in timeFilter: props = {} stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max']) gap = props['gap'] unit = re.split('\d+', gap)[1] return { 'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': unit}, 'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': unit}, 'gap': '%(gap)s' % props, # add a 'auto' } else: gap = timeFilter['gap'][facet['widgetType']] return { 'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': gap['unit']}, 'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': gap['unit']}, 'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto' } def _get_fq(self, collection, query): params = () timeFilter = {} if collection: timeFilter = self._get_range_borders(collection, query) if timeFilter and not timeFilter.get('time_filter_overrides'): params += (('fq', urllib.unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),) # Merge facets queries on same fields grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field'])) merged_fqs = [] for key, group in grouped_fqs: field_fq = next(group) for fq in group: for f in fq['filter']: field_fq['filter'].append(f) merged_fqs.append(field_fq) for fq in merged_fqs: if fq['type'] == 'field': fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] exclude = '-' if _filter['exclude'] else '' if value is not None and ' ' in force_unicode(value): value = force_unicode(value).replace('"', '\\"') f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) _params ='{!tag=%(id)s}' % fq + ' '.join(f) params += (('fq', urllib.unquote(utf_quoter(_params))),) elif fq['type'] == 'range': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'range-up': params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote( utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from']))) for field, f in zip(fq['filter'], fq['properties'])])),) elif fq['type'] == 'map': _keys = fq.copy() _keys.update(fq['properties']) params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote( utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),) return params def query(self, collection, query): solr_query = {} solr_query['collection'] = collection['name'] if query.get('download'): solr_query['rows'] = 1000 solr_query['start'] = 0 else: solr_query['rows'] = int(collection['template']['rows'] or 10) solr_query['start'] = int(query['start']) solr_query['rows'] = min(solr_query['rows'], 1000) solr_query['start'] = min(solr_query['start'], 10000) params = self._get_params() + ( ('q', self._get_q(query)), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) if any(collection['facets']): params += ( ('facet', 'true'), ('facet.mincount', 0), ('facet.limit', 10), ) json_facets = {} timeFilter = self._get_range_borders(collection, query) for facet in collection['facets']: if facet['type'] == 'query': params += (('facet.query', '%s' % facet['field']),) elif facet['type'] == 'range' or facet['type'] == 'range-up': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], 'mincount': int(facet['properties']['mincount']) } if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'): keys.update(self._get_time_filter_query(timeFilter, facet)) params += ( ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'field': keys = { 'id': '%(id)s' % facet, 'field': facet['field'], 'key': '%(field)s-%(id)s' % facet, 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } params += ( ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys), ) elif facet['type'] == 'nested': _f = { 'field': facet['field'], 'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0), 'mincount': int(facet['properties']['mincount']) } if 'start' in facet['properties']: _f.update({ 'type': 'range', 'start': facet['properties']['start'], 'end': facet['properties']['end'], 'gap': facet['properties']['gap'], }) if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'): _f.update(self._get_time_filter_query(timeFilter, facet)) else: _f.update({ 'type': 'terms', 'field': facet['field'], 'excludeTags': facet['id'] }) if facet['properties']['facets']: if facet['properties']['facets'][0]['aggregate'] == 'count': _f['facet'] = { 'd2': { 'type': 'terms', 'field': '%(field)s' % facet['properties']['facets'][0], 'limit': int(facet['properties']['facets'][0].get('limit', 10)), 'mincount': int(facet['properties']['facets'][0]['mincount']) } } if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation _f['facet']['d2']['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][1]) } else: _f['facet'] = { 'd2': self._get_aggregate_function(facet['properties']['facets'][0]) } json_facets[facet['id']] = _f elif facet['type'] == 'function': json_facets[facet['id']] = self._get_aggregate_function(facet) json_facets['processEmpty'] = True elif facet['type'] == 'pivot': if facet['properties']['facets'] or facet['widgetType'] == 'map-widget': fields = facet['field'] fields_limits = [] for f in facet['properties']['facets']: fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit'])) fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount'])) fields += ',' + f['field'] keys = { 'id': '%(id)s' % facet, 'key': '%(field)s-%(id)s' % facet, 'field': facet['field'], 'fields': fields, 'limit': int(facet['properties'].get('limit', 10)), 'mincount': int(facet['properties']['mincount']), 'fields_limits': ' '.join(fields_limits) } params += ( ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys), ) if json_facets: params += ( ('json.facet', json.dumps(json_facets)), ) params += self._get_fq(collection, query) if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']: fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []) # Add field if needed if collection['template']['leafletmap'].get('latitudeField'): fields.add(collection['template']['leafletmap']['latitudeField']) if collection['template']['leafletmap'].get('longitudeField'): fields.add(collection['template']['leafletmap']['longitudeField']) if collection['template']['leafletmap'].get('labelField'): fields.add(collection['template']['leafletmap']['labelField']) params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),) else: params += (('fl', '*'),) params += ( ('hl', 'true'), ('hl.fl', '*'), ('hl.snippets', 5), ('hl.fragsize', 1000), ) if collection['template']['fieldsSelected']: fields = [] for field in collection['template']['fieldsSelected']: attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes']) if attribute_field: if attribute_field[0]['sort']['direction']: fields.append('%s %s' % (field, attribute_field[0]['sort']['direction'])) if fields: params += ( ('sort', ','.join(fields)), ) response = self._root.get('%(collection)s/select' % solr_query, params) return self._get_json(response) def suggest(self, collection, query): try: params = self._get_params() + ( ('suggest', 'true'), ('suggest.build', 'true'), ('suggest.q', query['q']), ('wt', 'json'), ) if query.get('dictionary'): params += ( ('suggest.dictionary', query['dictionary']), ) response = self._root.get('%s/suggest' % collection, params) return self._get_json(response) except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
class SparkJob(Application): def __init__(self, job, rm_api=None, hs_api=None): super(SparkJob, self).__init__(job, rm_api) self._resolve_tracking_url() if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api: self.history_server_api = hs_api self._get_metrics() @property def logs_url(self): log_links = self.history_server_api.get_executors_loglinks(self) return log_links[ 'stdout'] if log_links and 'stdout' in log_links else '' @property def attempt_id(self): return self.trackingUrl.strip('/').split('/')[-1] def _resolve_tracking_url(self): resp = None try: self._client = HttpClient(self.trackingUrl, logger=LOG) self._root = Resource(self._client) yarn_cluster = cluster.get_cluster_conf_for_job_submission() self._security_enabled = yarn_cluster.SECURITY_ENABLED.get() if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get()) actual_url = self._execute(self._root.resolve_redirect_url) if actual_url.strip('/').split('/')[-1] == 'jobs': actual_url = actual_url.strip('/').replace('jobs', '') self.trackingUrl = actual_url LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl) except Exception as e: LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e) finally: if resp is not None: resp.close() def _execute(self, function, *args, **kwargs): response = None try: response = function(*args, **kwargs) except Exception as e: LOG.warn( 'Spark resolve tracking URL returned a failed response: %s' % e) return response def _get_metrics(self): self.metrics = {} try: executors = self.history_server_api.executors(self) if executors: self.metrics['headers'] = [ _('Executor Id'), _('Address'), _('RDD Blocks'), _('Storage Memory'), _('Disk Used'), _('Active Tasks'), _('Failed Tasks'), _('Complete Tasks'), _('Task Time'), _('Input'), _('Shuffle Read'), _('Shuffle Write'), _('Logs') ] self.metrics['executors'] = [] for e in executors: self.metrics['executors'].append([ e.get('id', 'N/A'), e.get('hostPort', ''), e.get('rddBlocks', ''), '%s / %s' % (big_filesizeformat(e.get('memoryUsed', 0)), big_filesizeformat(e.get('maxMemory', 0))), big_filesizeformat(e.get('diskUsed', 0)), e.get('activeTasks', ''), e.get('failedTasks', ''), e.get('completedTasks', ''), format_duration_in_millis(e.get('totalDuration', 0)), big_filesizeformat(e.get('totalInputBytes', 0)), big_filesizeformat(e.get('totalShuffleRead', 0)), big_filesizeformat(e.get('totalShuffleWrite', 0)), e.get('executorLogs', '') ]) except Exception as e: LOG.error('Failed to get Spark Job executors: %s' % e) # Prevent a nosedive. Don't create metrics if api changes or url is unreachable. def get_executors(self): executor_list = [] if hasattr(self, 'metrics') and 'executors' in self.metrics: executors = self.metrics['executors'] headers = [ 'executor_id', 'address', 'rdd_blocks', 'storage_memory', 'disk_used', 'active_tasks', 'failed_tasks', 'complete_tasks', 'task_time', 'input', 'shuffle_read', 'shuffle_write', 'logs' ] for executor in executors: executor_data = dict(zip(headers, executor)) executor_data.update({ 'id': executor_data['executor_id'] + '_executor_' + self.jobId, 'type': 'SPARK_EXECUTOR' }) executor_list.append(executor_data) return executor_list
class MapreduceApi(object): def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(mr_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.username self._thread_local.user = user return curr def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager(self.username).kill(app_id) # We need to call the RM
class LivyClient(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By': 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "LivyClient at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user.split('@')[0] if has_connectors(): # Only SQL supported via connectors currently properties['kind'] = 'sql' return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class SqoopClient(object): STATUS_GOOD = ('FINE', 'ACCEPTABLE') STATUS_BAD = ('UNACCEPTABLE', 'FAILURE_ON_SUBMIT') def __init__(self, url, username, language='en', ssl_cert_ca_verify=False): self._url = url self._client = HttpClient(self._url, logger=LOG) self._root = SqoopResource(self._client) self._language = language self._username = username if has_sqoop_has_security(): self._client.set_kerberos_auth() self._security_enabled = has_sqoop_has_security() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "SqoopClient at %s with security %s" % (self._url, self._security_enabled) @property def url(self): return self._url @property def headers(self): return { 'Accept': 'application/json', 'Accept-Language': self._language, 'sqoop-user-name': self._username } def get_version(self): return self._root.get('version', headers=self.headers) def get_driver(self): resp_dict = self._root.get('%s/driver' % API_VERSION, headers=self.headers) driver = Driver.from_dict(resp_dict) return driver def get_connectors(self): resp_dict = self._root.get('%s/connectors' % API_VERSION, headers=self.headers) connectors = [ Connector.from_dict(connector_dict) for connector_dict in resp_dict['connectors'] ] return connectors def get_connector(self, connector_id): resp_dict = self._root.get('%s/connector/%d/' % (API_VERSION, connector_id), headers=self.headers) if resp_dict['connector']: return Connector.from_dict(resp_dict['connector']) return None def get_links(self): resp_dict = self._root.get('%s/links' % API_VERSION, headers=self.headers) links = [Link.from_dict(link_dict) for link_dict in resp_dict['links']] return links def get_link(self, link_id): resp_dict = self._root.get('%s/link/%d/' % (API_VERSION, link_id), headers=self.headers) if resp_dict['link']: return Link.from_dict(resp_dict['link']) return None def create_link(self, link): link.creation_date = int( round(time.time() * 1000) ) link.update_date = link.creation_date link_dict = link.to_dict() request_dict = { 'link': link_dict } resp = self._root.post('%s/link/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) link.id = resp['id'] return link def update_link(self, link): if not link.link_config_values: link.link_config_values = self.get_connectors()[0].link_config link.updated = int( round(time.time() * 1000) ) link_dict = link.to_dict() request_dict = { 'link': link_dict } resp = self._root.put('%s/link/%d/' % (API_VERSION, link.id), data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) return link def delete_link(self, link): resp = self._root.delete('%s/link/%d/' % (API_VERSION, link.id), headers=self.headers) return None def get_jobs(self): resp_dict = self._root.get('%s/jobs' % API_VERSION, headers=self.headers) jobs = [Job.from_dict(job_dict) for job_dict in resp_dict['jobs']] return jobs def get_job(self, job_id): resp_dict = self._root.get('%s/job/%d/' % (API_VERSION, job_id), headers=self.headers) if resp_dict['job']: return Job.from_dict(resp_dict['job']) return None def create_job(self, job): if not job.from_config_values: job.from_config_values = self.get_connectors()[0].job_config['FROM'] if not job.to_config_values: job.to_config_values = self.get_connectors()[0].job_config['TO'] if not job.driver_config_values: job.driver_config_values = self.get_driver().job_config job.creation_date = int( round(time.time() * 1000) ) job.update_date = job.creation_date job_dict = job.to_dict() request_dict = { 'job': job_dict } resp = self._root.post('%s/job/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers) if 'id' not in resp: raise SqoopException.from_dicts(resp['validation-result']) job.id = resp['id'] return job def update_job(self, job): if not job.from_config_values: job.from_config_values = self.get_connectors()[0].job_config['FROM'] if not job.to_config_values: job.to_config_values = self.get_connectors()[0].job_config['TO'] if not job.driver_config_values: job.driver_config_values = self.get_driver().job_config job.updated = int( round(time.time() * 1000) ) job_dict = job.to_dict() request_dict = { 'job': job_dict } resp = self._root.put('%s/job/%d/' % (API_VERSION, job.id), data=json.dumps(request_dict), headers=self.headers) # Lame check that iterates to make sure we have an error # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]} for result in resp['validation-result']: if result: raise SqoopException.from_dicts(resp['validation-result']) return job def delete_job(self, job): resp_dict = self._root.delete('%s/job/%s' % (API_VERSION, job.id), headers=self.headers) return None def get_job_status(self, job): resp_dict = self._root.get('%s/job/%d/status' % (API_VERSION, job.id), headers=self.headers) return Submission.from_dict(resp_dict['submission']) def start_job(self, job): resp_dict = self._root.put('%s/job/%d/start' % (API_VERSION, job.id), headers=self.headers) if resp_dict['submission']['status'] in SqoopClient.STATUS_BAD: raise SqoopSubmissionException.from_dict(resp_dict['submission']) return Submission.from_dict(resp_dict['submission']) def stop_job(self, job): resp_dict = self._root.put('%s/job/%d/stop' % (API_VERSION, job.id), headers=self.headers) return Submission.from_dict(resp_dict['submission']) def get_submissions(self): resp_dict = self._root.get('%s/submissions' % API_VERSION, headers=self.headers) submissions = [Submission.from_dict(submission_dict) for submission_dict in resp_dict['submissions']] return submissions def set_user(self, user): self._user = user def set_language(self, language): self._language = language