class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._ssl_cert_ca_verify = ssl_cert_ca_verify if self._security_enabled: self._client.set_kerberos_auth() if ssl_cert_ca_verify: self._client.set_verify(True) def __str__(self): return "ResourceManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "NodeManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, "ws", _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "NodeManagerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def containers(self): return self._root.get("node/containers", headers={"Accept": _JSON_CONTENT_TYPE}) def container(self, container_id): return self._root.get( "node/containers/%(container_id)s" % {"container_id": container_id}, headers={"Accept": _JSON_CONTENT_TYPE} )
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False # To store user info self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self, **kwargs): return self._root.get("healthz", params=kwargs, headers={"Accept": _TEXT_CONTENT_TYPE}) def submit_job(self, appName, classPath, data, context=None, sync=False): params = {"appName": appName, "classPath": classPath, "sync": sync} if context: params["context"] = context return self._root.post("jobs" % params, params=params, data=data, contenttype=_BINARY_CONTENT_TYPE) def job(self, job_id): return self._root.get("jobs/%s" % job_id, headers={"Accept": _JSON_CONTENT_TYPE}) def jobs(self, **kwargs): return self._root.get("jobs", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE}) def create_context(self, name, **kwargs): return self._root.post("contexts/%s" % name, params=kwargs, contenttype=_BINARY_CONTENT_TYPE) def contexts(self, **kwargs): return self._root.get("contexts", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE}) def delete_context(self, name, **kwargs): return self._root.delete("contexts/%s" % name) def upload_jar(self, app_name, data): return self._root.post("jars/%s" % app_name, data=data, contenttype=_BINARY_CONTENT_TYPE) def jars(self, **kwargs): return self._root.get("jars", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE})
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def create_session(self, **kwargs): return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session)
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "MapreduceApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url): self._url = solr_url self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) def query(self, solr_query, hue_core): try: params = ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query() fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq),) response = self._root.get('%(collection)s/select' % solr_query, params) return json.loads(response) except RestException, e: raise PopupException('Error while accessing Solr: %s' % e)
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url): self._url = solr_url self._client = HttpClient(self._url, logger=LOG) if SECURITY_ENABLED.get(): self._client.set_kerberos_auth() self._root = Resource(self._client) def query(self, solr_query, hue_core): try: params = ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq),) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' response = json.loads(response) return response except RestException, e: raise PopupException('Error while accessing Solr: %s' % e)
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts["jobAttempt"][-1] log_link = attempt["logsLink"] # Get MR task logs if self.assignedContainerId: log_link = log_link.replace(attempt["containerId"], self.assignedContainerId) if hasattr(self, "nodeHttpAddress"): log_link = log_link.replace(attempt["nodeHttpAddress"].split(":")[0], self.nodeHttpAddress.split(":")[0]) for name in ("stdout", "stderr", "syslog"): link = "/%s/" % name params = {} if int(offset) >= 0: params["start"] = offset try: log_link = re.sub("job_[^/]+", self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response).xpath("/html/body/table/tbody/tr/td[2]")[0].text_content() except Exception, e: log = _("Failed to retrieve log: %s") % e logs.append(log)
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Get MR task logs if self.assignedContainerId: log_link = log_link.replace(attempt['containerId'], self.assignedContainerId) if hasattr(self, 'nodeHttpAddress'): log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0]) for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name params = {} if int(offset) >= 0: params['start'] = offset try: log_link = re.sub('job_[^/]+', self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s') % e logs.append(log)
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url): self._url = solr_url self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) def query(self, solr_query, hue_core): try: params = ( ("q", solr_query["q"] or EMPTY_QUERY.get()), ("wt", "json"), ("rows", solr_query["rows"]), ("start", solr_query["start"]), ) params += hue_core.get_query(solr_query) fqs = solr_query["fq"].split("|") for fq in fqs: if fq: params += (("fq", fq),) response = self._root.get("%(collection)s/select" % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' response = json.loads(response) return response except RestException, e: raise PopupException("Error while accessing Solr: %s" % e)
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def stages(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def executors(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
class ImpalaDaemonApi(object): def __init__(self, server_url): self._url = server_url self._client = HttpClient(self._url, logger=LOG) # You can set username/password for Impala Web UI which overrides kerberos if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get() is not None: self._client.set_digest_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get()) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "ImpalaDaemonApi at %s" % self._url @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def set_user(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_queries(self): params = { 'json': 'true' } resp = self._root.get('queries', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError, e: raise ImpalaDaemonApiException('ImpalaDaemonApi did not return valid JSON: %s' % e)
def get_log_list(self): log_link, user = self.get_log_link() if not log_link: return [] params = { 'doAs': user } log_link = re.sub('job_[^/]+', str(self.id), log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get('/', params=params) links = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]//a/@href') parsed_links = map(lambda x: urlparse.urlsplit(x), links) return map(lambda x: x and len(x) >= 2 and x[2].split('/')[-2] or '', parsed_links)
class ImpalaDaemonApi(object): def __init__(self, server_url): self._url = server_url self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "ImpalaDaemonApi at %s" % self._url @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def set_user(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_query_profile(self, query_id): params = { 'query_id': query_id, 'json': 'true' } profile = None resp = self._root.get('query_profile', params=params) try: profile = json.loads(resp) except ValueError, e: raise ImpalaDaemonApiException('ImpalaDaemonApi query_profile did not return valid JSON.') return profile
class SolrApi(object): """ http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler """ def __init__(self, solr_url, user): self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) def _get_params(self): if self.security_enabled: return (('doAs', self._user ),) return (('user.name', DEFAULT_USER), ('doAs', self._user),) def query(self, solr_query, hue_core): try: params = self._get_params() + ( ('q', solr_query['q'] or EMPTY_QUERY.get()), ('wt', 'json'), ('rows', solr_query['rows']), ('start', solr_query['start']), ) params += hue_core.get_query(solr_query) fqs = solr_query['fq'].split('|') for fq in fqs: if fq: params += (('fq', fq),) response = self._root.get('%(collection)s/select' % solr_query, params) if type(response) != dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError, e: # Got some null bytes in the response LOG.error('%s: %s' % (unicode(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
def get_task_log(self, offset=0): logs = [] log_link, user = self.get_log_link() if not log_link: return ['', '', ''] for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': # Yarn currently dumps with 500 error with doas in running state params = {} else: params = { 'doAs': user } if int(offset) != 0: params['start'] = offset else: params['start'] = 0 response = None try: log_link = re.sub('job_[^/]+', str(self.id), log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if response: debug_info += '\nHTML Response: %s' % response LOG.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Get MR task logs # Don't hack up the urls if they've been migrated to the job history server. for cluster in YARN_CLUSTERS.get().itervalues(): if log_link.startswith(cluster.HISTORY_SERVER_API_URL.get()): break else: if self.assignedContainerId: log_link = log_link.replace(attempt['containerId'], self.assignedContainerId) if hasattr(self, 'nodeHttpAddress'): log_link = log_link.replace(attempt['nodeHttpAddress'].split(':')[0], self.nodeHttpAddress.split(':')[0]) for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name params = {} if int(offset) >= 0: params['start'] = offset try: log_link = re.sub('job_[^/]+', self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link debug_info += '\nHTML Response: %s' % response LOGGER.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)
class FlinkSqlClient(): ''' Implements https://github.com/ververica/flink-sql-gateway Could be a pip module or sqlalchemy dialect in the future. ''' def __init__(self, user, api_url): self.user = user self._url = posixpath.join(api_url + '/' + _API_VERSION + '/') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) def __str__(self): return "FlinkClient at %s" % (self._url, ) def info(self): return self._root.get('info') def create_session(self, **properties): data = { "session_name": "test", # optional "planner": "blink", # required, "old"/"blink" "execution_type": "streaming", # required, "batch"/"streaming" "properties": { # optional "key": "value" } } data.update(properties) return self._root.post('sessions', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def session_heartbeat(self, session_id): return self._root.post('sessions/%(session_id)s/heartbeat' % {'session_id': session_id}) def execute_statement(self, session_id, statement): data = { "statement": statement, # required "execution_timeout": "" # execution time limit in milliseconds, optional, but required for stream SELECT ? } return self._root.post('sessions/%(session_id)s/statements' % {'session_id': session_id}, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_status(self, session_id, job_id): return self._root.get( 'sessions/%(session_id)s/jobs/%(job_id)s/status' % { 'session_id': session_id, 'job_id': job_id }) def fetch_results(self, session_id, job_id, token=0): return self._root.get( 'sessions/%(session_id)s/jobs/%(job_id)s/result/%(token)s' % { 'session_id': session_id, 'job_id': job_id, 'token': token }) def close_statement(self, session_id, job_id): return self._root.delete('sessions/%(session_id)s/jobs/%(job_id)s' % { 'session_id': session_id, 'job_id': job_id, }) def close_session(self, session_id): return self._root.delete('sessions/%(session_id)s' % { 'session_id': session_id, })
class ResourceManagerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws', _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "ResourceManagerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def cluster(self, **kwargs): return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def apps(self, **kwargs): return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def app(self, app_id): return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, app_id): return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE) def _execute(self, function, *args, **kwargs): response = function(*args, **kwargs) # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has # failed back to the master RM. if isinstance(response, str) and response.startswith( 'This is standby RM. Redirecting to the current active RM'): raise YarnFailoverOccurred(response) return response
def job_attempt_logs_json(request, job, attempt_index=0, name='syslog', offset=LOG_OFFSET_BYTES, is_embeddable=False): """For async log retrieval as Yarn servers are very slow""" log_link = None response = {'status': -1} try: jt = get_api(request.user, request.jt) app = jt.get_application(job.jobId) if app['applicationType'] == 'MAPREDUCE': if app['finalStatus'] in ('SUCCEEDED', 'FAILED', 'KILLED'): attempt_index = int(attempt_index) if not job.job_attempts['jobAttempt']: response = {'status': 0, 'log': _('Job has no tasks')} else: attempt = job.job_attempts['jobAttempt'][attempt_index] log_link = attempt['logsLink'] # Reformat log link to use YARN RM, replace node addr with node ID addr log_link = log_link.replace(attempt['nodeHttpAddress'], attempt['nodeId']) elif app['state'] == 'RUNNING': log_link = app['amContainerLogs'] elif app.get('amContainerLogs'): log_link = app.get('amContainerLogs') except (KeyError, RestException) as e: raise KeyError( _("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e) except Exception as e: raise Exception( _("Failed to get application for job %s: %s") % (job.jobId, e)) if log_link: link = '/%s/' % name params = {'doAs': request.user.username} if offset != 0: params['start'] = offset root = Resource(get_log_client(log_link), urllib.parse.urlsplit(log_link)[2], urlencode=False) api_resp = None try: api_resp = root.get(link, params=params) log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() response['status'] = 0 response['log'] = LinkJobLogs._make_hdfs_links(log, is_embeddable) except Exception as e: response['log'] = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if api_resp: debug_info += '\nHTML Response: %s' % response response['debug'] = debug_info LOG.error(debug_info) except: LOG.exception('failed to create debug info') return JsonResponse(response)
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException as e: raise ManagerApiException(e) def get_spark_history_server_configs(self, cluster_name=None): service_name = "SPARK_ON_YARN" shs_role_type = "SPARK_YARN_HISTORY_SERVER" try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] if service_display_names: spark_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name })['items'] shs_server_names = [ server['name'] for server in servers if server['type'] == shs_role_type ] shs_server_name = shs_server_names[ 0] if shs_server_names else None shs_server_hostRef = [ server['hostRef'] for server in servers if server['type'] == shs_role_type ] shs_server_hostId = shs_server_hostRef[0][ 'hostId'] if shs_server_hostRef else None if shs_server_name and shs_server_hostId: shs_server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name, 'shs_server_name': shs_server_name }, params={'view': 'full'})['items'] return shs_server_hostId, shs_server_configs except Exception as e: LOG.warning("Check Spark History Server via ManagerApi: %s" % e) return None, None def get_spark_history_server_url(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_hostId and shs_server_configs: shs_ui_port = None shs_ssl_port = None shs_ssl_enabled = None for config in shs_server_configs: if 'relatedName' in config and 'default' in config: if config['relatedName'] == 'spark.history.ui.port': shs_ui_port = config['default'] if config['relatedName'] == 'spark.ssl.historyServer.port': shs_ssl_port = config['default'] if config[ 'relatedName'] == 'spark.ssl.historyServer.enabled': shs_ssl_enabled = config['default'] shs_ui_host = self._root.get('hosts/%(hostId)s' % {'hostId': shs_server_hostId}) shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None return self.assemble_shs_url(shs_ui_hostname, shs_ui_port, shs_ssl_port, shs_ssl_enabled) return None def get_spark_history_server_security_enabled(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_configs: for config in shs_server_configs: if 'relatedName' in config and 'default' in config and config[ 'relatedName'] == 'history_server_spnego_enabled': shs_security_enabled = config['default'] return shs_security_enabled and shs_security_enabled == 'true' return False def assemble_shs_url(self, shs_ui_hostname, shs_ui_port=None, shs_ssl_port=None, shs_ssl_enabled=None): if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled: LOG.warning("Spark conf not found!") return None protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http' shs_url = '%(protocol)s://%(hostname)s:%(port)s' % { 'protocol': protocol, 'hostname': shs_ui_hostname, 'port': shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port, } return shs_url def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException as e: raise ManagerApiException(e) def get_kafka_brokers(self, cluster_name=None): try: hosts = self._get_hosts('KAFKA', 'KAFKA_BROKER', cluster_name=cluster_name) brokers_hosts = [host['hostname'] + ':9092' for host in hosts] return ','.join(brokers_hosts) except RestException as e: raise ManagerApiException(e) def get_kudu_master(self, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == 'KUDU' ][0] master = self._get_roles(cluster['name'], service['name'], 'KUDU_MASTER')[0] master_host = self._root.get('hosts/%(hostId)s' % master['hostRef']) return master_host['hostname'] except RestException as e: raise ManagerApiException(e) def get_kafka_topics(self, broker_host): try: client = HttpClient('http://%s:24042' % broker_host, logger=LOG) root = Resource(client) return root.get('/api/topics') except RestException as e: raise ManagerApiException(e) def update_flume_config(self, cluster_name, config_name, config_value): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roleConfigGroup = [ role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] data = { u'items': [{ u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.' .replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace( '%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), u'body': { u'items': [{ u'name': config_name, u'value': config_value }] }, u'contentType': u'application/json', u'method': u'PUT' }] } return self.batch(items=data) def get_flume_agents(self, cluster_name=None): return [ host['hostname'] for host in self._get_hosts( 'FLUME', 'AGENT', cluster_name=cluster_name) ] def _get_hosts(self, service_name, role_name, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == service_name ][0] hosts = self._get_roles(cluster['name'], service['name'], role_name) hosts_ids = [host['hostRef']['hostId'] for host in hosts] hosts = self._root.get('hosts')['items'] return [host for host in hosts if host['hostId'] in hosts_ids] except RestException as e: raise ManagerApiException(e) def refresh_flume(self, cluster_name, restart=False): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roles = [ role['name'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] if restart: return self.restart_services(cluster['name'], service, roles) else: return self.refresh_configs(cluster['name'], service, roles) def refresh_configs(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def restart_services(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def batch(self, items): try: return self._root.post('batch', data=json.dumps(items), contenttype='application/json') except RestException as e: raise ManagerApiException(e) def _get_cluster(self, cluster_name=None): clusters = self._root.get('clusters/')['items'] if cluster_name is not None: cluster = [ cluster for cluster in clusters if cluster['name'] == cluster_name ][0] else: cluster = clusters[0] return cluster def _get_roles(self, cluster_name, service_name, role_type): roles = self._root.get( 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % { 'cluster_name': cluster_name, 'service_name': service_name })['items'] return [role for role in roles if role['type'] == role_type] def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): if not key or not impalad_host: return None service_name = "IMPALA" role_type = 'IMPALAD' try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] hosts = self._root.get('hosts')['items'] impalad_hostIds = [ host['hostId'] for host in hosts if host['hostname'] == impalad_host ] if impalad_hostIds and service_display_names: impalad_hostId = impalad_hostIds[0] impala_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name })['items'] impalad_server_names = [ server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId ] impalad_server_name = impalad_server_names[ 0] if impalad_server_names else None if impalad_server_name: server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name, 'shs_server_name': impalad_server_name }, params={'view': 'full'})['items'] for config in server_configs: if 'relatedName' in config and 'value' in config: if config['relatedName'] == key: return config['value'] except Exception as e: LOG.warning( "Get Impala Daemon API configurations via ManangerAPI: %s" % e) return None
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Generate actual task log link from logsLink url if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING'): logs_path = '/node/containerlogs/' node_url, tracking_path = log_link.split(logs_path) container_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual container properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress) container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % { 'node_url': node_url, 'logs_path': logs_path.strip('/'), 'container': container_id, 'user': user } else: # Completed jobs logs_path = '/jobhistory/logs/' root_url, tracking_path = log_link.split(logs_path) node_url, container_id, attempt_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual attempt properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1]) container_id = self.assignedContainerId if hasattr(self, 'assignedContainerId') else container_id attempt_id = self.attemptId if hasattr(self, 'attemptId') else attempt_id log_link = '%(root_url)s/%(logs_path)s/%(node)s/%(container)s/%(attempt)s/%(user)s' % { 'root_url': root_url, 'logs_path': logs_path.strip('/'), 'node': node_url, 'container': container_id, 'attempt': attempt_id, 'user': user } for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name params = {} if int(offset) != 0: params['start'] = offset response = None try: log_link = re.sub('job_[^/]+', self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if response: debug_info += '\nHTML Response: %s' % response LOG.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url, ) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def user(self): return self.username # Backward compatibility @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.user self._thread_local.user = user return curr def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'job_id': job_id, 'task_id': task_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url, ) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id): return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers) def stages(self, app_id): return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers) def executors(self, app_id): return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers) def stage_attempts(self, app_id, stage_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % { 'app_id': app_id, 'stage_id': stage_id }, headers=self.headers) def stage_attempt(self, app_id, stage_id, stage_attempt_id): return self._root.get( 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % { 'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id }, headers=self.headers) def task_summary(self, app_id, stage_id, stage_attempt_id): return self._root.get( 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % { 'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id }, headers=self.headers) def task_list(self, app_id, stage_id, stage_attempt_id): return self._root.get( 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % { 'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id }, headers=self.headers) def storages(self, app_id): return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers) def storage(self, app_id, rdd_id): return self._root.get( 'applications/%(app_id)s/storage/rdd/%(rdd_id)s' % { 'app_id': app_id, 'rdd_id': rdd_id }, headers=self.headers) def download_logs(self, app_id): return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers) def download_attempt_logs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % { 'app_id': app_id, 'attempt_id': attempt_id }, headers=self.headers)
class MapreduceApi(object): def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(mr_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url, ) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.username self._thread_local.user = user return curr def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager(self.username).kill( app_id) # We need to call the RM
try: attempt_index = int(attempt_index) attempt = job.job_attempts['jobAttempt'][attempt_index] log_link = attempt['logsLink'] except (KeyError, RestException), e: raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e) link = '/%s/' % name params = {} if offset and int(offset) >= 0: params['start'] = offset root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) debug_info = '' try: response = root.get(link, params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link debug_info += '\nHTML Response: %s' % response LOGGER.error(debug_info) except: LOGGER.exception('failed to create debug info') response = {'log': LinkJobLogs._make_hdfs_links(log), 'debug': debug_info} return JsonResponse(response)
raise Exception( _("Failed to get application for job %s: %s") % (job.jobId, e)) if log_link: link = '/%s/' % name params = {} if offset != 0: params['start'] = offset root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) api_resp = None try: api_resp = root.get(link, params=params) log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() response['status'] = 0 response['log'] = LinkJobLogs._make_hdfs_links(log) except Exception, e: response['log'] = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if api_resp: debug_info += '\nHTML Response: %s' % response response['debug'] = debug_info LOG.error(debug_info) except: LOG.exception('failed to create debug info')
class LivyClient(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By': 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "LivyClient at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class MapreduceApi(object): def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(mr_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled self._thread_local = threading.local() # To store user info if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) def _get_params(self): params = {} if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self._security_enabled: params['user.name'] = DEFAULT_USER.get() return params @property def url(self): return self._url @property def username(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER.get() def setuser(self, user): curr = self.username self._thread_local.user = user return curr def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager(self.username).kill(app_id) # We need to call the RM
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() def __str__(self): return "MapreduceApi at %s" % (self._url, ) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE})
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, API_VERSION) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store user info self._thread_local = threading.local() def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER def setuser(self, user): """Return the previous user""" prev = self.user self._thread_local.user = user return prev def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status') def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) params['jobtype'] = jobtype filter_list = [ ] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs('wf', offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs('coord', offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs('bundle', offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({'len': -1}) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def create_session(self, **kwargs): return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def delete_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False, api_version=API_VERSION): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info self._thread_local = threading.local() self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def _get_params(self): if self.security_enabled: return {"doAs": self.user, "timezone": TIME_ZONE.get()} return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()} def _get_oozie_properties(self, properties=None): defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ("name", "user", "group", "status") def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) params["jobtype"] = jobtype filter_list = [] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) if jobtype == "wf": wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == "coord": wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs("wf", offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs("coord", offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs("bundle", offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({"len": -1}) resp = self._root.get("job/%s" % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_action(self, action_id): if "C@" in action_id: Klass = CoordinatorAction elif "B@" in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get("job/%s" % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ("start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun"): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params["action"] = action if parameters is not None: params.update(parameters) return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params["action"] = "rerun" return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get("sla", params) return resp["slaSummaryList"]
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By' : 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class HistoryServerApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "HistoryServerApi at %s" % (self._url, ) @property def url(self): return self._url def job(self, user, job_id): return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): job_id = job_id.replace('application', 'job') return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'job_id': job_id, 'task_id': task_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt_counters(self, job_id, task_id, attempt_id): return self._root.get( 'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % { 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id }, headers={'Accept': _JSON_CONTENT_TYPE})
class SparkHistoryServerApi(object): def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False): self._ui_url = spark_hs_url self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "Spark History Server API at %s" % (self._url,) @property def url(self): return self._url @property def ui_url(self): return self._ui_url @property def headers(self): return {'Accept': _JSON_CONTENT_TYPE} def applications(self): return self._root.get('applications', headers=self.headers) def application(self, app_id): return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers) def jobs(self, app_id): return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers) def stages(self, app_id): return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers) def executors(self, job): LOG.debug("Getting executors for Spark job %s" % job.jobId) app_id = self.get_real_app_id(job) if not app_id: return [] return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers) def stage_attempts(self, app_id, stage_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers) def stage_attempt(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_summary(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def task_list(self, app_id, stage_id, stage_attempt_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) def storages(self, app_id): return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers) def storage(self, app_id, rdd_id): return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers) def download_logs(self, app_id): return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers) def download_attempt_logs(self, app_id, attempt_id): return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) def download_executors_logs(self, request, job, name, offset): log_links = self.get_executors_loglinks(job) return self.retrieve_log_content(log_links, name, request.user.username, offset) def download_executor_logs(self, user, executor, name, offset): return self.retrieve_log_content(executor['logs'], name, user.username, offset) def retrieve_log_content(self, log_links, log_name, username, offset): params = { 'doAs': username } if offset != 0: params['start'] = offset if not log_name or not log_name == 'stderr': log_name = 'stdout' log = '' if log_links and log_name in log_links: log_link = log_links[log_name] root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False) response = root.get('', params=params) log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() return log def get_executors_loglinks(self, job): executor = None if job.metrics and 'executors' in job.metrics and job.metrics['executors']: executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver'] # look up driver executor if not executors: executor = job.metrics['executors'][0] else: executor = executors[0] return None if not executor else executor[12] def get_real_app_id(self, job): # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases. # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts. try: jobs_json = self.applications() job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId] if not job_filtered_json: return {} attempts = job_filtered_json[0]['attempts'] if len(attempts) == 1: app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId'] else: app_id = job.jobId + '/%d' % len(attempts) LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId)) except Exception as e: LOG.error('Cannot get real app id %s: %s' % (job.jobId, e)) app_id = None return app_id
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get("sessions") def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params["from"] = startFrom if size is not None: params["size"] = size response = self._root.get("sessions/%s/log" % uuid, params=params) return "\n".join(response["log"]) def create_session(self, **properties): properties["proxyUser"] = self.user return self._root.post("sessions", data=json.dumps(properties), contenttype="application/json") def get_session(self, uuid): return self._root.get("sessions/%s" % uuid) def submit_statement(self, uuid, statement): data = {"code": statement} return self._root.post("sessions/%s/statements" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {"code": statement} return self._root.post("sessions/%s/inspect" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get("sessions/%s/statements/%s" % (session, statement)) def cancel(self, session): return self._root.post("sessions/%s/interrupt" % session) def close(self, uuid): return self._root.delete("sessions/%s" % uuid) def get_batches(self): return self._root.get("batches") def submit_batch(self, properties): properties["proxyUser"] = self.user return self._root.post("batches", data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get("batches/%s" % uuid) def get_batch_status(self, uuid): response = self._root.get("batches/%s/state" % uuid) return response["state"] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params["from"] = startFrom if size is not None: params["size"] = size response = self._root.get("batches/%s/log" % uuid, params=params) return "\n".join(response["log"]) def close_batch(self, uuid): return self._root.delete("batches/%s" % uuid)
class ImpalaDaemonApi(object): def __init__(self, server_url): self._url = server_url self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = is_kerberos_enabled() self._webserver_spnego_enabled = is_webserver_spnego_enabled() self._thread_local = threading.local() # You can set username/password for Impala Web UI which overrides kerberos if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get( ) is not None: if DAEMON_API_AUTH_SCHEME.get().lower() == 'basic': self._client.set_basic_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get()) LOG.info( "Using username and password for basic authentication") else: self._client.set_digest_auth(DAEMON_API_USERNAME.get(), DAEMON_API_PASSWORD.get()) LOG.info( 'Using username and password for digest authentication') elif self._webserver_spnego_enabled or self._security_enabled: self._client.set_kerberos_auth() LOG.info('Using kerberos principal for authentication') def __str__(self): return "ImpalaDaemonApi at %s" % self._url @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def set_user(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_queries(self): params = {'json': 'true'} resp = self._root.get('queries', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi did not return valid JSON: %s' % e) def get_query(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_plan', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi did not return valid JSON: %s' % e) def get_query_profile(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_profile', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_profile did not return valid JSON: %s' % e) def get_query_memory(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_memory', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_memory did not return valid JSON: %s' % e) def kill(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('cancel_query', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi kill did not return valid JSON: %s' % e) def get_query_backends(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_backends', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_backends did not return valid JSON: %s' % e) def get_query_finstances(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_finstances', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_finstances did not return valid JSON: %s' % e) def get_query_summary(self, query_id): params = {'query_id': query_id, 'json': 'true'} resp = self._root.get('query_summary', params=params) try: if isinstance(resp, basestring): return json.loads(resp) else: return resp except ValueError as e: raise ImpalaDaemonApiException( 'ImpalaDaemonApi query_summary did not return valid JSON: %s' % e) def get_query_profile_encoded(self, query_id): params = {'query_id': query_id} return self._root.get('query_profile_encoded', params=params)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False # To store user info self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self, **kwargs): return self._root.get('healthz', params=kwargs, headers={'Accept': _TEXT_CONTENT_TYPE}) def submit_job(self, appName, classPath, data, context=None, sync=False): params = {'appName': appName, 'classPath': classPath, 'sync': sync} if context: params['context'] = context return self._root.post('jobs' % params, params=params, data=data, contenttype=_BINARY_CONTENT_TYPE) def job(self, job_id): return self._root.get('jobs/%s' % job_id, headers={'Accept': _JSON_CONTENT_TYPE}) def jobs(self, **kwargs): return self._root.get('jobs', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def create_context(self, name, **kwargs): return self._root.post('contexts/%s' % name, params=kwargs, contenttype=_BINARY_CONTENT_TYPE) def contexts(self, **kwargs): return self._root.get('contexts', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def delete_context(self, name, **kwargs): return self._root.delete('contexts/%s' % name) def upload_jar(self, app_name, data): return self._root.post('jars/%s' % app_name, data=data, contenttype=_BINARY_CONTENT_TYPE) def jars(self, **kwargs): return self._root.get('jars', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})
log_link = attempt['logsLink'] except (KeyError, RestException), e: raise KeyError( _("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e) link = '/%s/' % name params = {} if offset and int(offset) >= 0: params['start'] = offset root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) try: response = root.get(link, params=params) log = html.fromstring(response).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s') % e response = {'log': log} return HttpResponse(json.dumps(response), mimetype="application/json") @check_job_permission def job_single_logs(request, job): """ Try to smartly detect the most useful task attempt (e.g. Oozie launcher, failed task) and get its MR logs. """
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime') VALID_LOG_FILTERS = {'recent', 'limit', 'loglevel', 'text'} def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid,), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid,), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
def get_task_log(self, offset=0): logs = [] attempt = self.task.job.job_attempts['jobAttempt'][-1] log_link = attempt['logsLink'] # Generate actual task log link from logsLink url if self.task.job.status in ('NEW', 'SUBMITTED', 'RUNNING' ) or self.type == 'Oozie Launcher': logs_path = '/node/containerlogs/' node_url, tracking_path = log_link.split(logs_path) container_id, user = tracking_path.strip('/').split('/') # Replace log path tokens with actual container properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s://%s' % (node_url.split('://')[0], self.nodeHttpAddress) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id log_link = '%(node_url)s/%(logs_path)s/%(container)s/%(user)s' % { 'node_url': node_url, 'logs_path': logs_path.strip('/'), 'container': container_id, 'user': user } else: # Completed jobs logs_path = '/jobhistory/logs/' root_url, tracking_path = log_link.split(logs_path) node_url, container_id, attempt_id, user = tracking_path.strip( '/').split('/') # Replace log path tokens with actual attempt properties if available if hasattr(self, 'nodeHttpAddress') and 'nodeId' in attempt: node_url = '%s:%s' % (self.nodeHttpAddress.split(':')[0], attempt['nodeId'].split(':')[1]) container_id = self.assignedContainerId if hasattr( self, 'assignedContainerId') else container_id attempt_id = self.attemptId if hasattr(self, 'attemptId') else attempt_id log_link = '%(root_url)s/%(logs_path)s/%(node)s/%(container)s/%(attempt)s/%(user)s' % { 'root_url': root_url, 'logs_path': logs_path.strip('/'), 'node': node_url, 'container': container_id, 'attempt': attempt_id, 'user': user } for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': # Yarn currently dumps with 500 error with doas in running state params = {} else: params = {'doAs': user} if int(offset) != 0: params['start'] = offset else: params['start'] = 0 response = None try: log_link = re.sub('job_[^/]+', self.id, log_link) root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) response = root.get(link, params=params) log = html.fromstring( response, parser=html.HTMLParser()).xpath( '/html/body/table/tbody/tr/td[2]')[0].text_content() except Exception, e: log = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if response: debug_info += '\nHTML Response: %s' % response LOG.error(debug_info) except: LOG.exception('failed to build debug info') logs.append(log)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class MapreduceApi(object): def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled if self._security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) def __str__(self): return "MapreduceApi at %s" % (self._url,) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) # If it hits the job history server, it will return HTML. # Simply return None in this case because there isn't much data there. if isinstance(response, basestring): return None else: return response def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE}) def kill(self, job_id): app_id = job_id.replace('job', 'application') get_resource_manager().kill(app_id) # We need to call the RM
except (KeyError, RestException), e: raise KeyError(_("Cannot find job attempt '%(id)s'.") % {'id': job.jobId}, e) except Exception, e: raise Exception(_("Failed to get application for job %s: %s") % (job.jobId, e)) if log_link: link = '/%s/' % name params = {} if offset != 0: params['start'] = offset root = Resource(get_log_client(log_link), urlparse.urlsplit(log_link)[2], urlencode=False) api_resp = None try: api_resp = root.get(link, params=params) log = html.fromstring(api_resp, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content() response['status'] = 0 response['log'] = LinkJobLogs._make_hdfs_links(log) except Exception, e: response['log'] = _('Failed to retrieve log: %s' % e) try: debug_info = '\nLog Link: %s' % log_link if api_resp: debug_info += '\nHTML Response: %s' % response response['debug'] = debug_info LOG.error(debug_info) except: LOG.exception('failed to create debug info')
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return {'doAs': self.user, 'timezone': TIME_ZONE.get()} return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime') VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text')) def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid, ), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid, ), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid, ), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid, ), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id, ), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore', 'update'): msg = 'Invalid oozie job action: %s' % (action, ) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join( ['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class MapreduceApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url, 'proxy') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False def __str__(self): return "MapreduceApi at %s" % (self._url, ) @property def url(self): return self._url def job(self, user, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def counters(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def tasks(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def job_attempts(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def conf(self, job_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % { 'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_counters(self, job_id, task_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempts(self, job_id, task_id): app_id = job_id.replace('job', 'application') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE}) def task_attempt(self, job_id, task_id, attempt_id): app_id = job_id.replace('job', 'application') job_id = job_id.replace('application', 'job') return self._root.get( '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % { 'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION }, headers={'Accept': _JSON_CONTENT_TYPE})