class SparkHistoryServerApi(object):
    def __init__(self,
                 spark_hs_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._ui_url = spark_hs_url
        self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "Spark History Server API at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def ui_url(self):
        return self._ui_url

    @property
    def headers(self):
        return {'Accept': _JSON_CONTENT_TYPE}

    def applications(self):
        return self._root.get('applications', headers=self.headers)

    def application(self, app_id):
        return self._root.get('applications/%(app_id)s' % {'app_id': app_id},
                              headers=self.headers)

    def jobs(self, app_id, attempt_id):
        return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {
            'app_id': app_id,
            'attempt_id': attempt_id
        },
                              headers=self.headers)

    def stages(self, app_id, attempt_id):
        return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' %
                              {
                                  'app_id': app_id,
                                  'attempt_id': attempt_id
                              },
                              headers=self.headers)

    def executors(self, app_id, attempt_id):
        return self._root.get(
            'applications/%(app_id)s/%(attempt_id)s/executors' % {
                'app_id': app_id,
                'attempt_id': attempt_id
            },
            headers=self.headers)
Exemple #2
0
class OptimizerApi(object):
    def __init__(self,
                 api_url=None,
                 product_name=None,
                 product_secret=None,
                 ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get()):
        self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/')
        self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get(
        )
        self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get(
        )

        self._client = HttpClient(self._api_url, logger=LOG)
        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

    def create_product(self, product_name, product_secret):
        try:
            data = {
                'productName': product_name,
                'productSecret': product_secret,
                'authCode': ''
            }
            return self._root.post('/api/createProduct', data)
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Optimizer'))
Exemple #3
0
class ResourceManagerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "ResourceManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def cluster(self, **kwargs):
        return self._execute(self._root.get,
                             'cluster',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def apps(self, **kwargs):
        return self._execute(self._root.get,
                             'cluster/apps',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def app(self, app_id):
        return self._execute(self._root.get,
                             'cluster/apps/%(app_id)s' % {'app_id': app_id},
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, app_id):
        return self._execute(self._root.put,
                             'cluster/apps/%(app_id)s/state' %
                             {'app_id': app_id},
                             data=json.dumps({'state': 'KILLED'}),
                             contenttype=_JSON_CONTENT_TYPE)

    def _execute(self, function, *args, **kwargs):
        response = function(*args, **kwargs)

        # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
        # failed back to the master RM.
        if isinstance(response, str) and response.startswith(
                'This is standby RM. Redirecting to the current active RM'):
            raise YarnFailoverOccurred(response)

        return response
Exemple #4
0
class THttpClient(TTransportBase):
    """
  HTTP transport mode for Thrift.

  HTTPS and Kerberos support with Request.

  e.g.
  mode = THttpClient('http://hbase-thrift-v1.com:9090')
  mode = THttpClient('http://hive-localhost:10001/cliservice')
  """
    def __init__(self, base_url):
        self._base_url = base_url
        self._client = HttpClient(self._base_url, logger=LOG)
        self._data = None
        self._headers = None
        self._wbuf = buffer_writer()

    def open(self):
        pass

    def set_kerberos_auth(self, service="HTTP"):
        self._client.set_kerberos_auth(service=service)

    def set_basic_auth(self, username, password):
        self._client.set_basic_auth(username, password)

    def set_bearer_auth(self, token):
        self._client.set_bearer_auth(token)

    def set_verify(self, verify=True):
        self._client.set_verify(verify)

    def close(self):
        self._headers = None
        # Close session too?

    def isOpen(self):
        return self._client is not None

    def setTimeout(self, ms):
        if not self._headers:
            self._headers = {}
        self._headers.update(timeout=str(int(ms / 1000)))

    def setCustomHeaders(self, headers):
        self._headers = headers

    def read(self, sz):
        return self._data

    def write(self, buf):
        self._wbuf.write(buf)

    def flush(self):
        data = self._wbuf.getvalue()
        self._wbuf = buffer_writer()

        # POST
        self._root = Resource(self._client)
        self._data = self._root.post('', data=data, headers=self._headers)
Exemple #5
0
class NodeManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "NodeManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def containers(self):
    return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE})

  def container(self, container_id):
    return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #6
0
class PrometheusApi(object):

  def __init__(self, user=None, ssl_cert_ca_verify=False):
    self._api_url = '%s/%s' % (PROMETHEUS.API_URL.get().strip('/'), VERSION)

    self.user = user
    self._client = HttpClient(self._api_url, logger=LOG)

    self._client.set_verify(ssl_cert_ca_verify)
    self._root = Resource(self._client)


  def query(self, query):
    try:
      return self._root.get('query', {
        'query': query,
      })['data']
    except RestException as e:
      raise PrometheusApiException(e)

  def range_query(self, query, start, end, step):
    # e.g. /api/v1/query_range?query=up&start=2015-07-01T20:10:30.781Z&end=2015-07-01T20:11:00.781Z&step=15s
    try:
      return self._root.get('query_range', {
        'query': query,
        'start': start,
        'end': end,
        'step': step
      })['data']
    except RestException as e:
      raise PrometheusApiException(e)
Exemple #7
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def has_service(self, service_name, cluster_name=None):
        cluster = self._get_cluster(cluster_name)
        try:
            services = self._root.get(
                'clusters/%(cluster_name)s/serviceTypes' % {
                    'cluster_name': cluster['name'],
                    'service_name': service_name
                })['items']

            return service_name in services
        except RestException, e:
            raise ManagerApiException(e)
Exemple #8
0
class OptimizerApi(object):

  def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None):
    self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/')
    self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get()
    self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get()
    self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get()
    self._email = OPTIMIZER.EMAIL.get()
    self._email_password = OPTIMIZER.EMAIL_PASSWORD.get()

    self._client = HttpClient(self._api_url, logger=LOG)
    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)

  def create_product(self, product_name=None, product_secret=None, authCode=None):
    try:
      data = {
          'productName': product_name if product_name is not None else self._product_name,
          'productSecret': product_secret if product_secret is not None else self._product_secret,
          'authCode': authCode if authCode is not None else self._product_auth_secret
      }
      return self._root.post('/api/createProduct', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Optimizer'))
class NodeManagerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "NodeManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def containers(self):
        return self._root.get('node/containers',
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def container(self, container_id):
        return self._root.get('node/containers/%(container_id)s' %
                              {'container_id': container_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #10
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def tools_echo(self):
        try:
            params = (('message', 'hello'), )

            LOG.info(params)
            return self._root.get('tools/echo', params=params)
        except RestException, e:
            raise ManagerApiException(e)
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._ssl_cert_ca_verify = ssl_cert_ca_verify

    if self._security_enabled:
      self._client.set_kerberos_auth()
      if ssl_cert_ca_verify:
        self._client.set_verify(True)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
Exemple #12
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
Exemple #13
0
class OptimizerApi(object):

  def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None):
    self._api_url = (api_url or get_optimizer_url()).strip('/')
    self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get()
    self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get()
    self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get()
    self._email = OPTIMIZER.EMAIL.get()
    self._email_password = OPTIMIZER.EMAIL_PASSWORD.get()

    self._client = HttpClient(self._api_url, logger=LOG)
    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)
    self._token = None


  def _authenticate(self, force=False):
    if self._token is None or force:
      self._token = self.authenticate()['token']

    return self._token


  def create_product(self, product_name=None, product_secret=None, authCode=None):
    try:
      data = {
          'productName': product_name if product_name is not None else self._product_name,
          'productSecret': product_secret if product_secret is not None else self._product_secret,
          'authCode': authCode if authCode is not None else self._product_auth_secret
      }
      return self._root.post('/api/createProduct', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Optimizer'))
Exemple #14
0
class THttpClient(TTransportBase):
  """
  HTTP transport mode for Thrift.

  HTTPS and Kerberos support with Request.

  e.g.
  mode = THttpClient('http://hbase-thrift-v1.com:9090')
  mode = THttpClient('http://hive-localhost:10001/cliservice')
  """

  def __init__(self, base_url):
    self._base_url = base_url
    self._client = HttpClient(self._base_url, logger=LOG)
    self._data = None
    self._headers = None
    self._wbuf = StringIO()

  def open(self):
    pass

  def set_kerberos_auth(self):
    self._client.set_kerberos_auth()

  def set_basic_auth(self, username, password):
    self._client.set_basic_auth(username, password)

  def set_verify(self, verify=True):
    self._client.set_verify(verify)

  def close(self):
    self._headers = None
    # Close session too?

  def isOpen(self):
    return self._client is not None

  def setTimeout(self, ms):
    pass

  def setCustomHeaders(self, headers):
    self._headers = headers

  def read(self, sz):
    return self._data

  def write(self, buf):
    self._wbuf.write(buf)

  def flush(self):
    data = self._wbuf.getvalue()
    self._wbuf = StringIO()

    # POST
    self._root = Resource(self._client)
    self._data = self._root.post('', data=data, headers=self._headers)
Exemple #15
0
class OptimizerApi(object):
    def __init__(self,
                 api_url=None,
                 product_name=None,
                 product_secret=None,
                 ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(),
                 product_auth_secret=None):
        self._api_url = (api_url or get_optimizer_url()).strip('/')
        self._email = OPTIMIZER.EMAIL.get()
        self._email_password = OPTIMIZER.EMAIL_PASSWORD.get()
        self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get(
        )
        self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get(
        )
        self._product_name = product_name if product_name else (
            OPTIMIZER.PRODUCT_NAME.get() or self.get_tenant()['tenant']
        )  # Aka "tenant"

        self._client = HttpClient(self._api_url, logger=LOG)
        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)
        self._token = None

    def _authenticate(self, force=False):
        if self._token is None or force:
            self._token = self.authenticate()['token']

        return self._token

    def _exec(self, command, args):
        data = None
        response = {'status': 'error'}

        try:
            cmd_args = [
                'ccs', 'navopt',
                '--endpoint-url=%s' % self._api_url, command
            ]
            if self._product_secret:
                cmd_args += ['--auth-config', self._product_secret]

            data = subprocess.check_output(cmd_args + args)
        except CalledProcessError, e:
            if command == 'upload' and e.returncode == 1:
                LOG.info(
                    'Upload command is successful despite return code of 1: %s'
                    % e.output)
                data = '\n'.join(
                    e.output.split('\n')[3:])  # Beware removing of {"url":...}
        except RestException, e:
            raise OptimizerApiException(
                e, title=_('Error while accessing Optimizer'))
Exemple #16
0
class HistoryServerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "HistoryServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    job_id = job_id.replace('application', 'job')
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt_counters(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #17
0
def _create_query_store_client(request, content_type='application/json; charset=UTF-8'):
  headers = {
    'x-do-as': request.user.username,
    'X-Requested-By': 'das',
    'Content-Type': content_type,
    'Cookie': request.environ.get('HTTP_COOKIE')
  }

  client = HttpClient(QUERY_STORE.SERVER_URL.get())
  client.set_headers(headers)
  client.set_verify(False)

  if USE_SASL.get():
    client.set_kerberos_auth()

  return client
Exemple #18
0
class ResourceManagerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)

  def _execute(self, function, *args, **kwargs):
    response = function(*args, **kwargs)

    # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
    # failed back to the master RM.
    if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'):
      raise YarnFailoverOccurred(response)

    return response
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def stages(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def executors(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
class PrometheusApi(object):

  def __init__(self, user=None, ssl_cert_ca_verify=False):
    self._api_url = '%s/%s' % (PROMETHEUS.API_URL.get().strip('/'), VERSION)

    self.user = user
    self._client = HttpClient(self._api_url, logger=LOG)

    self._client.set_verify(ssl_cert_ca_verify)
    self._root = Resource(self._client)


  def query(self, query):
    try:
      return self._root.get('query', {
        'query': query,
      })['data']
    except RestException, e:
      raise PrometheusApiException(e)
Exemple #21
0
class OptimizerApi(object):

  def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get()):
    self._api_url = (api_url or OPTIMIZER.API_URL.get()).strip('/')
    self._product_name = product_name if product_name else OPTIMIZER.PRODUCT_NAME.get()
    self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get()

    self._client = HttpClient(self._api_url, logger=LOG)
    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)

  def create_product(self, product_name, product_secret):
    try:
      data = {
          'productName': product_name,
          'productSecret': product_secret,
          'authCode': ''
      }
      return self._root.post('/api/createProduct', data)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Optimizer'))
Exemple #22
0
class SparkJob(Application):
    def __init__(self, job, rm_api=None, hs_api=None):
        super(SparkJob, self).__init__(job, rm_api)
        self._resolve_tracking_url()
        if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
            self.history_server_api = hs_api
            self._get_metrics()

    @property
    def logs_url(self):
        log_links = self.history_server_api.get_executors_loglinks(self)
        return log_links[
            'stdout'] if log_links and 'stdout' in log_links else ''

    @property
    def attempt_id(self):
        return self.trackingUrl.strip('/').split('/')[-1]

    def _resolve_tracking_url(self):
        resp = None
        try:
            self._client = HttpClient(self.trackingUrl, logger=LOG)
            self._root = Resource(self._client)
            yarn_cluster = cluster.get_cluster_conf_for_job_submission()
            self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
            if self._security_enabled:
                self._client.set_kerberos_auth()

            self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
            actual_url = self._execute(self._root.resolve_redirect_url)

            if actual_url.strip('/').split('/')[-1] == 'jobs':
                actual_url = actual_url.strip('/').replace('jobs', '')
            self.trackingUrl = actual_url
            LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
        except Exception, e:
            LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" %
                     e)
        finally:
Exemple #23
0
class SparkJob(Application):

  def __init__(self, job, rm_api=None, hs_api=None):
    super(SparkJob, self).__init__(job, rm_api)
    self._resolve_tracking_url()
    if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
      self.history_server_api = hs_api
      self._get_metrics()

  @property
  def logs_url(self):
    log_links = self.history_server_api.get_executors_loglinks(self)
    return log_links['stdout'] if log_links and 'stdout' in log_links else ''

  @property
  def attempt_id(self):
    return self.trackingUrl.strip('/').split('/')[-1]

  def _resolve_tracking_url(self):
    resp = None
    try:
      self._client = HttpClient(self.trackingUrl, logger=LOG)
      self._root = Resource(self._client)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
      if self._security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
      actual_url = self._execute(self._root.resolve_redirect_url)

      if actual_url.strip('/').split('/')[-1] == 'jobs':
        actual_url = actual_url.strip('/').replace('jobs', '')
      self.trackingUrl = actual_url
      LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
    except Exception, e:
      LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
    finally:
class MapreduceApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'proxy')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "MapreduceApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    def job(self, user, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        app_id = job_id.replace('job', 'application')
        response = self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})
        # If it hits the job history server, it will return HTML.
        # Simply return None in this case because there isn't much data there.
        if isinstance(response, basestring):
            return None
        else:
            return response

    def tasks(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' %
            {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, job_id):
        app_id = job_id.replace('job', 'application')
        get_resource_manager().kill(app_id)  # We need to call the RM
class MapreduceApi(object):
    def __init__(self,
                 mr_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(mr_url, 'proxy')
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        self._thread_local = threading.local()  # To store user info

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "MapreduceApi at %s" % (self._url, )

    def _get_params(self):
        params = {}

        if self.username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self.username
            if not self._security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    @property
    def url(self):
        return self._url

    @property
    def username(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER.get()

    def setuser(self, user):
        curr = self.username
        self._thread_local.user = user
        return curr

    def job(self, user, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        app_id = job_id.replace('job', 'application')
        response = self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})
        # If it hits the job history server, it will return HTML.
        # Simply return None in this case because there isn't much data there.
        if isinstance(response, basestring):
            return None
        else:
            return response

    def tasks(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' %
            {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {
                'app_id': app_id,
                'job_id': job_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        app_id = job_id.replace('job', 'application')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        app_id = job_id.replace('job', 'application')
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            '%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'app_id': app_id,
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id,
                'version': _API_VERSION
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, job_id):
        app_id = job_id.replace('job', 'application')
        get_resource_manager(self.username).kill(
            app_id)  # We need to call the RM
Exemple #26
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def has_service(self, service_name, cluster_name=None):
        cluster = self._get_cluster(cluster_name)
        try:
            services = self._root.get(
                'clusters/%(cluster_name)s/serviceTypes' % {
                    'cluster_name': cluster['name'],
                    'service_name': service_name
                })['items']

            return service_name in services
        except RestException as e:
            raise ManagerApiException(e)

    def get_spark_history_server_configs(self, cluster_name=None):
        service_name = "SPARK_ON_YARN"
        shs_role_type = "SPARK_YARN_HISTORY_SERVER"

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            if service_display_names:
                spark_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        spark_service_display_name
                    })['items']

                shs_server_names = [
                    server['name'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_name = shs_server_names[
                    0] if shs_server_names else None
                shs_server_hostRef = [
                    server['hostRef'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_hostId = shs_server_hostRef[0][
                    'hostId'] if shs_server_hostRef else None

                if shs_server_name and shs_server_hostId:
                    shs_server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            spark_service_display_name,
                            'shs_server_name': shs_server_name
                        },
                        params={'view': 'full'})['items']
                    return shs_server_hostId, shs_server_configs
        except Exception as e:
            LOG.warning("Check Spark History Server via ManagerApi: %s" % e)

        return None, None

    def get_spark_history_server_url(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_hostId and shs_server_configs:
            shs_ui_port = None
            shs_ssl_port = None
            shs_ssl_enabled = None
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config:
                    if config['relatedName'] == 'spark.history.ui.port':
                        shs_ui_port = config['default']
                    if config['relatedName'] == 'spark.ssl.historyServer.port':
                        shs_ssl_port = config['default']
                    if config[
                            'relatedName'] == 'spark.ssl.historyServer.enabled':
                        shs_ssl_enabled = config['default']
            shs_ui_host = self._root.get('hosts/%(hostId)s' %
                                         {'hostId': shs_server_hostId})
            shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None

            return self.assemble_shs_url(shs_ui_hostname, shs_ui_port,
                                         shs_ssl_port, shs_ssl_enabled)

        return None

    def get_spark_history_server_security_enabled(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_configs:
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config and config[
                        'relatedName'] == 'history_server_spnego_enabled':
                    shs_security_enabled = config['default']
                    return shs_security_enabled and shs_security_enabled == 'true'

        return False

    def assemble_shs_url(self,
                         shs_ui_hostname,
                         shs_ui_port=None,
                         shs_ssl_port=None,
                         shs_ssl_enabled=None):
        if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled:
            LOG.warning("Spark conf not found!")
            return None

        protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http'
        shs_url = '%(protocol)s://%(hostname)s:%(port)s' % {
            'protocol':
            protocol,
            'hostname':
            shs_ui_hostname,
            'port':
            shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port,
        }

        return shs_url

    def tools_echo(self):
        try:
            params = (('message', 'hello'), )

            LOG.info(params)
            return self._root.get('tools/echo', params=params)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_brokers(self, cluster_name=None):
        try:

            hosts = self._get_hosts('KAFKA',
                                    'KAFKA_BROKER',
                                    cluster_name=cluster_name)

            brokers_hosts = [host['hostname'] + ':9092' for host in hosts]

            return ','.join(brokers_hosts)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kudu_master(self, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services if service['type'] == 'KUDU'
            ][0]
            master = self._get_roles(cluster['name'], service['name'],
                                     'KUDU_MASTER')[0]

            master_host = self._root.get('hosts/%(hostId)s' %
                                         master['hostRef'])

            return master_host['hostname']
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_topics(self, broker_host):
        try:
            client = HttpClient('http://%s:24042' % broker_host, logger=LOG)
            root = Resource(client)

            return root.get('/api/topics')
        except RestException as e:
            raise ManagerApiException(e)

    def update_flume_config(self, cluster_name, config_name, config_value):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roleConfigGroup = [
            role['roleConfigGroupRef']['roleConfigGroupName']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]
        data = {
            u'items': [{
                u'url':
                u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'
                .replace('%(cluster_name)s',
                         urllib_quote(cluster['name'])).replace(
                             '%(service)s',
                             service).replace('%(roleConfigGroups)s',
                                              roleConfigGroup[0]),
                u'body': {
                    u'items': [{
                        u'name': config_name,
                        u'value': config_value
                    }]
                },
                u'contentType':
                u'application/json',
                u'method':
                u'PUT'
            }]
        }

        return self.batch(items=data)

    def get_flume_agents(self, cluster_name=None):
        return [
            host['hostname'] for host in self._get_hosts(
                'FLUME', 'AGENT', cluster_name=cluster_name)
        ]

    def _get_hosts(self, service_name, role_name, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services
                if service['type'] == service_name
            ][0]
            hosts = self._get_roles(cluster['name'], service['name'],
                                    role_name)
            hosts_ids = [host['hostRef']['hostId'] for host in hosts]

            hosts = self._root.get('hosts')['items']
            return [host for host in hosts if host['hostId'] in hosts_ids]
        except RestException as e:
            raise ManagerApiException(e)

    def refresh_flume(self, cluster_name, restart=False):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roles = [
            role['name']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]

        if restart:
            return self.restart_services(cluster['name'], service, roles)
        else:
            return self.refresh_configs(cluster['name'], service, roles)

    def refresh_configs(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/refresh' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def restart_services(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/restart' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def batch(self, items):
        try:
            return self._root.post('batch',
                                   data=json.dumps(items),
                                   contenttype='application/json')
        except RestException as e:
            raise ManagerApiException(e)

    def _get_cluster(self, cluster_name=None):
        clusters = self._root.get('clusters/')['items']

        if cluster_name is not None:
            cluster = [
                cluster for cluster in clusters
                if cluster['name'] == cluster_name
            ][0]
        else:
            cluster = clusters[0]

        return cluster

    def _get_roles(self, cluster_name, service_name, role_type):
        roles = self._root.get(
            'clusters/%(cluster_name)s/services/%(service_name)s/roles' % {
                'cluster_name': cluster_name,
                'service_name': service_name
            })['items']
        return [role for role in roles if role['type'] == role_type]

    def get_impalad_config(self,
                           key=None,
                           impalad_host=None,
                           cluster_name=None):
        if not key or not impalad_host:
            return None

        service_name = "IMPALA"
        role_type = 'IMPALAD'

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            hosts = self._root.get('hosts')['items']
            impalad_hostIds = [
                host['hostId'] for host in hosts
                if host['hostname'] == impalad_host
            ]

            if impalad_hostIds and service_display_names:
                impalad_hostId = impalad_hostIds[0]
                impala_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        impala_service_display_name
                    })['items']

                impalad_server_names = [
                    server['name'] for server in servers
                    if server['type'] == role_type
                    and server['hostRef']['hostId'] == impalad_hostId
                ]
                impalad_server_name = impalad_server_names[
                    0] if impalad_server_names else None

                if impalad_server_name:
                    server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            impala_service_display_name,
                            'shs_server_name': impalad_server_name
                        },
                        params={'view': 'full'})['items']

                    for config in server_configs:
                        if 'relatedName' in config and 'value' in config:
                            if config['relatedName'] == key:
                                return config['value']

        except Exception as e:
            LOG.warning(
                "Get Impala Daemon API configurations via ManangerAPI: %s" % e)

        return None
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id):
    return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers)

  def stages(self, app_id):
    return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers)

  def executors(self, job):
    LOG.debug("Getting executors for Spark job %s" % job.jobId)
    app_id = self.get_real_app_id(job)
    if not app_id:
      return []

    return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers)

  def stage_attempts(self, app_id, stage_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers)

  def stage_attempt(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_summary(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_list(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def storages(self, app_id):
    return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers)

  def storage(self, app_id, rdd_id):
    return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers)

  def download_logs(self, app_id):
    return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers)

  def download_attempt_logs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def download_executors_logs(self, request, job, name, offset):
    log_links = self.get_executors_loglinks(job)

    return self.retrieve_log_content(log_links, name, request.user.username, offset)

  def download_executor_logs(self, user, executor, name, offset):
    return self.retrieve_log_content(executor['logs'], name, user.username, offset)

  def retrieve_log_content(self, log_links, log_name, username, offset):
    params = {
      'doAs': username
    }

    if offset != 0:
      params['start'] = offset

    if not log_name or not log_name == 'stderr':
      log_name = 'stdout'

    log = ''
    if log_links and log_name in log_links:
      log_link = log_links[log_name]

      root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False)
      response = root.get('', params=params)
      log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
    return log

  def get_executors_loglinks(self, job):
    executor = None
    if job.metrics and 'executors' in job.metrics and job.metrics['executors']:
      executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver']  # look up driver executor
      if not executors:
        executor = job.metrics['executors'][0]
      else:
        executor = executors[0]

    return None if not executor else executor[12]

  def get_real_app_id(self, job):
    # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html
    # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases.
    # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match
    # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts.

    try:
      jobs_json = self.applications()
      job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId]

      if not job_filtered_json:
        return {}

      attempts = job_filtered_json[0]['attempts']

      if len(attempts) == 1:
        app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId']
      else:
        app_id = job.jobId + '/%d' % len(attempts)

      LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId))
    except Exception as e:
      LOG.error('Cannot get real app id %s: %s' % (job.jobId, e))
      app_id = None

    return app_id
Exemple #28
0
class JobServerApi(object):

  def __init__(self, livy_url):
    self._url = posixpath.join(livy_url)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = SECURITY_ENABLED.get()
    self._csrf_enabled = CSRF_ENABLED.get()
    self._thread_local = threading.local()

    if self.security_enabled:
      self._client.set_kerberos_auth()

    if self.csrf_enabled:
      self._client.set_headers({'X-Requested-By' : 'hue'})

    self._client.set_verify(SSL_CERT_CA_VERIFY.get())

  def __str__(self):
    return "JobServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def csrf_enabled(self):
    return self._csrf_enabled

  @property
  def user(self):
    return self._thread_local.user

  def setuser(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user

  def get_status(self):
    return self._root.get('sessions')

  def get_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('sessions/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def create_session(self, **properties):
    properties['proxyUser'] = self.user
    return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_sessions(self):
    return self._root.get('sessions')

  def get_session(self, uuid):
    return self._root.get('sessions/%s' % uuid)

  def get_statements(self, uuid):
    return self._root.get('sessions/%s/statements' % uuid)

  def submit_statement(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def inspect(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def fetch_data(self, session, statement):
    return self._root.get('sessions/%s/statements/%s' % (session, statement))

  def cancel(self, session):
    return self._root.post('sessions/%s/interrupt' % session)

  def close(self, uuid):
    return self._root.delete('sessions/%s' % uuid)

  def get_batches(self):
    return self._root.get('batches')

  def submit_batch(self, properties):
    properties['proxyUser'] = self.user
    return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_batch(self, uuid):
    return self._root.get('batches/%s' % uuid)

  def get_batch_status(self, uuid):
    response = self._root.get('batches/%s/state' % uuid)
    return response['state']

  def get_batch_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('batches/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def close_batch(self, uuid):
    return self._root.delete('batches/%s' % uuid)
Exemple #29
0
class ResourceManagerApi(object):

  def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(rm_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local() # To store user info
    self.from_failover = False

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get(): # We impersonate if needed
      params['doAs'] = self.username
      if not self.security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  @property
  def user(self):
    return self.username # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    params = self._get_params()
    params.update(kwargs)
    return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def appattempts(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def appattempts_attempt(self, app_id, attempt_id):
    attempts = self.appattempts(app_id)
    for attempt in attempts['appAttempts']['appAttempt']:
      if attempt['id'] == attempt_id:
        return attempt
    raise PopupException('Application {} does not have application attempt with id {}'.format(app_id, attempt_id))

  def kill(self, app_id):
    data = {'state': 'KILLED'}
    token = None

    # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app.
    if self.security_enabled and False:
      full_token = self.delegation_token()
      if 'token' not in full_token:
        raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token))
      data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token')
      LOG.debug('Received delegation token %s' % full_token)

    try:
      params = self._get_params()
      return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    finally:
      if token:
        self.cancel_token(token)

  def delegation_token(self):
    params = self._get_params()
    data = {'renewer': self.username}
    return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def cancel_token(self, token):
    params = self._get_params()
    headers = {'Hadoop-YARN-RM-Delegation-Token': token}
    LOG.debug('Canceling delegation token of ' % self.username)
    return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers)

  def _execute(self, function, *args, **kwargs):
    response = None
    try:
      response = function(*args, **kwargs)
    except Exception, e:
      raise PopupException(_('YARN RM returned a failed response: %s') % e)
    return response
Exemple #30
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """

    def __init__(
        self,
        solr_url,
        user,
        security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default,
        ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get(),
    ):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke("HEAD", "/")

    def query(self, collection, query):
        solr_query = {}

        solr_query["collection"] = collection["name"]

        if query.get("download"):
            solr_query["rows"] = 1000
            solr_query["start"] = 0
        else:
            solr_query["rows"] = int(collection["template"]["rows"] or 10)
            solr_query["start"] = int(query["start"])

        solr_query["rows"] = min(solr_query["rows"], 1000)
        solr_query["start"] = min(solr_query["start"], 10000)

        params = self._get_params() + (
            ("q", self._get_q(query)),
            ("wt", "json"),
            ("rows", solr_query["rows"]),
            ("start", solr_query["start"]),
        )

        if any(collection["facets"]):
            params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10))
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection["facets"]:
                if facet["type"] == "query":
                    params += (("facet.query", "%s" % facet["field"]),)
                elif facet["type"] == "range" or facet["type"] == "range-up":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "start": facet["properties"]["start"],
                        "end": facet["properties"]["end"],
                        "gap": facet["properties"]["gap"],
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if (
                        timeFilter
                        and timeFilter["time_field"] == facet["field"]
                        and (
                            facet["id"] not in timeFilter["time_filter_overrides"]
                            or facet["widgetType"] != "histogram-widget"
                        )
                    ):
                        keys.update(self._get_time_filter_query(timeFilter, facet))

                    params += (
                        (
                            "facet.range",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "field":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    params += (
                        (
                            "facet.field",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "nested":
                    _f = {
                        "field": facet["field"],
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "text-facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                        "sort": {"count": facet["properties"]["sort"]},
                    }
                    print facet

                    if facet["properties"]["domain"].get("blockParent") or facet["properties"]["domain"].get(
                        "blockChildren"
                    ):
                        _f["domain"] = {}
                        if facet["properties"]["domain"].get("blockParent"):
                            _f["domain"]["blockParent"] = " OR ".join(facet["properties"]["domain"]["blockParent"])
                        if facet["properties"]["domain"].get("blockChildren"):
                            _f["domain"]["blockChildren"] = " OR ".join(facet["properties"]["domain"]["blockChildren"])

                    if "start" in facet["properties"] and not facet["properties"].get("type") == "field":
                        _f.update(
                            {
                                "type": "range",
                                "start": facet["properties"]["start"],
                                "end": facet["properties"]["end"],
                                "gap": facet["properties"]["gap"],
                            }
                        )
                        if (
                            timeFilter
                            and timeFilter["time_field"] == facet["field"]
                            and (
                                facet["id"] not in timeFilter["time_filter_overrides"]
                                or facet["widgetType"] != "bucket-widget"
                            )
                        ):
                            _f.update(self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update(
                            {
                                "type": "terms",
                                "field": facet["field"],
                                "excludeTags": facet["id"],
                                "offset": 0,
                                "numBuckets": True,
                                "allBuckets": True,
                                "prefix": "",
                            }
                        )
                        if facet["properties"]["canRange"] and not facet["properties"]["isDate"]:
                            del _f["mincount"]  # Numeric fields do not support

                    if facet["properties"]["facets"]:
                        self._n_facet_dimension(facet, _f, facet["properties"]["facets"], 1)
                        if facet["widgetType"] == "text-facet-widget":
                            _fname = _f["facet"].keys()[0]
                            _f["sort"] = {_fname: facet["properties"]["sort"]}
                            # domain = '-d2:NaN' # Solr 6.4

                    json_facets[facet["id"]] = _f
                elif facet["type"] == "function":
                    json_facets[facet["id"]] = self._get_aggregate_function(facet)
                    json_facets["processEmpty"] = True
                elif facet["type"] == "pivot":
                    if facet["properties"]["facets"] or facet["widgetType"] == "map-widget":
                        fields = facet["field"]
                        fields_limits = []
                        for f in facet["properties"]["facets"]:
                            fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"]))
                            fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"]))
                            fields += "," + f["field"]
                        keys = {
                            "id": "%(id)s" % facet,
                            "key": "%(field)s-%(id)s" % facet,
                            "field": facet["field"],
                            "fields": fields,
                            "limit": int(facet["properties"].get("limit", 10)),
                            "mincount": int(facet["properties"]["mincount"]),
                            "fields_limits": " ".join(fields_limits),
                        }
                        params += (
                            (
                                "facet.pivot",
                                "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s"
                                % keys,
                            ),
                        )

            if json_facets:
                params += (("json.facet", json.dumps(json_facets)),)

        params += self._get_fq(collection, query)

        if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]:
            fields = set(
                collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else []
            )
            # Add field if needed
            if collection["template"]["leafletmap"].get("latitudeField"):
                fields.add(collection["template"]["leafletmap"]["latitudeField"])
            if collection["template"]["leafletmap"].get("longitudeField"):
                fields.add(collection["template"]["leafletmap"]["longitudeField"])
            if collection["template"]["leafletmap"].get("labelField"):
                fields.add(collection["template"]["leafletmap"]["labelField"])
            fl = urllib.unquote(utf_quoter(",".join(list(fields))))
        else:
            fl = "*"

        nested_fields = self._get_nested_fields(collection)
        if nested_fields:
            fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % " OR ".join(nested_fields)))

        params += (("fl", fl),)

        params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000))

        if collection["template"]["fieldsSelected"]:
            fields = []
            for field in collection["template"]["fieldsSelected"]:
                attribute_field = filter(
                    lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"]
                )
                if attribute_field:
                    if attribute_field[0]["sort"]["direction"]:
                        fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"]))
            if fields:
                params += (("sort", ",".join(fields)),)

        response = self._root.get("%(collection)s/select" % solr_query, params)
        return self._get_json(response)

    def _n_facet_dimension(self, widget, _f, facets, dim):
        facet = facets[0]
        f_name = "dim_%02d:%s" % (dim, facet["field"])

        if facet["aggregate"]["function"] == "count":
            if "facet" not in _f:
                _f["facet"] = {f_name: {}}
            else:
                _f["facet"][f_name] = {}
            _f = _f["facet"]

            _f[f_name] = {
                "type": "terms",
                "field": "%(field)s" % facet,
                "limit": int(facet.get("limit", 10)),
                "mincount": int(facet["mincount"]),
                "numBuckets": True,
                "allBuckets": True,
                "prefix": "",
            }
            if widget["widgetType"] == "tree2-widget" and facets[-1]["aggregate"]["function"] != "count":
                _f["subcount"] = self._get_aggregate_function(facets[-1])

            if len(facets) > 1:  # Get n+1 dimension
                if facets[1]["aggregate"]["function"] == "count":
                    self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1)
                else:
                    self._n_facet_dimension(widget, _f[f_name], facets[1:], dim)
        else:
            agg_function = self._get_aggregate_function(facet)
            _f["facet"] = {"agg_%02d_00:%s" % (dim, agg_function): agg_function}
            for i, _f_agg in enumerate(facets[1:], 1):
                if _f_agg["aggregate"]["function"] != "count":
                    agg_function = self._get_aggregate_function(_f_agg)
                    _f["facet"]["agg_%02d_%02d:%s" % (dim, i, agg_function)] = agg_function
                else:
                    self._n_facet_dimension(widget, _f, facets[i:], dim + 1)  # Get n+1 dimension
                    break

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ("suggest", "true"),
                ("suggest.build", "true"),
                ("suggest.q", query["q"]),
                ("wt", "json"),
            )
            if query.get("dictionary"):
                params += (("suggest.dictionary", query["dictionary"]),)
            response = self._root.get("%s/suggest" % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_("Error while accessing Solr"))
Exemple #31
0
class OozieApi(object):
    def __init__(self,
                 oozie_url,
                 user,
                 security_enabled=False,
                 api_version=API_VERSION,
                 ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        if hasattr(user, 'username'):
            self.user = user.username
        else:
            self.user = user
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def _get_params(self):
        if self.security_enabled:
            return {'doAs': self.user, 'timezone': TIME_ZONE.get()}
        return {
            'user.name': DEFAULT_USER,
            'doAs': self.user,
            'timezone': TIME_ZONE.get()
        }

    def _get_oozie_properties(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime',
                         'text')
    VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text'))

    def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
        """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = []
        params['jobtype'] = jobtype

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        # Send the request
        resp = self._root.get('jobs', params)
        if jobtype == 'wf':
            wf_list = WorkflowList(self, resp, filters=filters)
        elif jobtype == 'coord':
            wf_list = CoordinatorList(self, resp, filters=filters)
        else:
            wf_list = BundleList(self, resp, filters=filters)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('wf', offset, cnt, filters)

    def get_coordinators(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('coord', offset, cnt, filters)

    def get_bundles(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('bundle', offset, cnt, filters)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = {}
        params.update({'order': 'desc'})

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        resp = self._root.get('job/%s' % (jobid, ), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params['show'] = 'definition'
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_log(self, jobid, logfilter=None):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params['show'] = 'log'

        filter_list = []
        if logfilter is None:
            logfilter = []
        for key, val in logfilter:
            if key not in OozieApi.VALID_LOG_FILTERS:
                raise ValueError('"%s" is not a valid filter for job logs' %
                                 (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['logfilter'] = ';'.join(filter_list)
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_status(self, jobid):
        params = self._get_params()
        params['show'] = 'status'

        xml = self._root.get('job/%s' % (jobid, ), params)
        return xml

    def get_action(self, action_id):
        if 'C@' in action_id:
            Klass = CoordinatorAction
        elif 'B@' in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get('job/%s' % (action_id, ), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ('start', 'suspend', 'resume', 'kill', 'rerun',
                          'coord-rerun', 'bundle-rerun', 'change', 'ignore',
                          'update'):
            msg = 'Invalid oozie job action: %s' % (action, )
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params['action'] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'oozie.wf.application.path': application_path,
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)
        return resp['id']

    def dryrun(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        params['action'] = 'dryrun'
        return self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params['action'] = 'rerun'

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/build-version', params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get('admin/instrumentation', params)
        return resp

    def get_metrics(self):
        params = self._get_params()
        resp = self._root.get('admin/metrics', params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/configuration', params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/status', params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params['filter'] = ';'.join(
            ['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get('sla', params)
        return resp['slaSummaryList']
Exemple #32
0
class OptimizerApi(object):

  UPLOAD = {
    'queries': {
      'headers': ['SQL_ID', 'ELAPSED_TIME', 'SQL_FULLTEXT'],
      'file_headers': """{
    "fileLocation": "%(query_file)s",
    "tenant": "%(tenant)s",
    "fileName": "%(query_file_name)s",
    "sourcePlatform": "%(source_platform)s",
    "colDelim": ",",
    "rowDelim": "\\n",
    "headerFields": [
        {
            "count": 0,
            "coltype": "SQL_ID",
            "use": true,
            "tag": "",
            "name": "SQL_ID"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "ELAPSED_TIME"
        },
        {
            "count": 0,
            "coltype": "SQL_QUERY",
            "use": true,
            "tag": "",
            "name": "SQL_FULLTEXT"
        }
    ]
}"""
    },
    'table_stats': {
        'headers': ['TABLE_NAME', 'NUM_ROWS'],
        'file_headers': """{
    "fileLocation": "%(query_file)s",
    "tenant": "%(tenant)s",
    "fileName": "%(query_file_name)s",
    "sourcePlatform": "%(source_platform)s",
    "colDelim": ",",
    "rowDelim": "\\n",
    "headerFields": [
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "TABLE_NAME"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "NUM_ROWS"
        }
    ]
}"""
    },
    'cols_stats': {
        'headers': ['table_name', 'column_name', 'data_type', 'num_distinct', 'num_nulls', 'avg_col_len'], # Lower case for some reason
        'file_headers': """{
    "fileLocation": "%(query_file)s",
    "tenant": "%(tenant)s",
    "fileName": "%(query_file_name)s",
    "sourcePlatform": "%(source_platform)s",
    "colDelim": ",",
    "rowDelim": "\\n",
    "headerFields": [
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "table_name"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "column_name"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "data_type"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "num_distinct"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "num_nulls"
        },
        {
            "count": 0,
            "coltype": "NONE",
            "use": true,
            "tag": "",
            "name": "avg_col_len"
        }
    ]
}"""
    }
  }

  def __init__(self, api_url=None, product_name=None, product_secret=None, ssl_cert_ca_verify=OPTIMIZER.SSL_CERT_CA_VERIFY.get(), product_auth_secret=None):
    self._api_url = (api_url or get_optimizer_url()).strip('/')
    self._email = OPTIMIZER.EMAIL.get()
    self._email_password = OPTIMIZER.EMAIL_PASSWORD.get()
    self._product_secret = product_secret if product_secret else OPTIMIZER.PRODUCT_SECRET.get()
    self._product_auth_secret = product_auth_secret if product_auth_secret else OPTIMIZER.PRODUCT_AUTH_SECRET.get()
    self._product_name = product_name if product_name else (OPTIMIZER.PRODUCT_NAME.get() or self.get_tenant()['tenant']) # Aka "workload"

    self._client = HttpClient(self._api_url, logger=LOG)
    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)
    self._token = None


  def _authenticate(self, force=False):
    if self._token is None or force:
      self._token = self.authenticate()['token']

    return self._token


  def _exec(self, command, args):
    data = None
    response = {'status': 'error'}

    try:
      cmd_args = [
          'ccs',
          'navopt',
          '--endpoint-url=%s' % self._api_url,
          command
      ]
      if self._product_secret:
        cmd_args += ['--auth-config', self._product_secret]

      LOG.info(' '.join(cmd_args + args))
      data = subprocess.check_output(cmd_args + args)
    except CalledProcessError, e:
      if command == 'upload' and e.returncode == 1:
        LOG.info('Upload command is successful despite return code of 1: %s' % e.output)
        data = '\n'.join(e.output.split('\n')[3:]) # Beware removing of {"url":...}
      else:
        raise OptimizerApiException(e, title=_('Error while accessing Optimizer'))
    except RestException, e:
      raise OptimizerApiException(e, title=_('Error while accessing Optimizer'))
Exemple #33
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
    if solr_url is None and hasattr(SOLR_URL, 'get'):
      solr_url = SOLR_URL.get()

    if solr_url:
      self._url = solr_url
      self._user = user
      self._client = HttpClient(self._url, logger=LOG)
      self.security_enabled = security_enabled or SECURITY_ENABLED.get()

      if self.security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(ssl_cert_ca_verify)

      self._root = resource.Resource(self._client)

      # The Kerberos handshake requires two requests in order to authenticate,
      # but if our first request is a PUT/POST, it might flat-out reject the
      # first request if the body is too large. So, connect here in order to get
      # a cookie so future PUT/POSTs will be pre-authenticated.
      if self.security_enabled:
        self._root.invoke('HEAD', '/')


  def query(self, collection, query):
    solr_query = {}
    json_facets = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )

      timeFilter = self._get_range_borders(collection, query)

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }

          if facet['properties']['canRange'] or timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'):
            keys.update(self._get_time_filter_query(timeFilter, facet, collection))

          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {}
          if facet['properties']['facets']:
            self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter, collection, can_range = facet['properties']['canRange'])

          if facet['properties'].get('domain'):
            if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'):
              _f['domain'] = {}
              if facet['properties']['domain'].get('blockParent'):
                _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent'])
              if facet['properties']['domain'].get('blockChildren'):
                _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren'])

          if _f:
            sort = {'count': facet['properties']['facets'][0]['sort']}
            for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])):
              if agg['sort'] != 'default':
                agg_function = self._get_aggregate_function(agg)
                sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']}

            if sort.get('count') == 'default':
              sort['count'] = 'desc'

            dim_key = [key for key in list(_f['facet'].keys()) if 'dim' in key][0]
            _f['facet'][dim_key].update({
                  'excludeTags': facet['id'],
                  'offset': 0,
                  'numBuckets': True,
                  'allBuckets': True,
                  'sort': sort
                  #'prefix': '' # Forbidden on numeric fields
              })
            json_facets[facet['id']] = _f['facet'][dim_key]
        elif facet['type'] == 'function':
          if facet['properties']['facets']:
            json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0])
            if facet['properties']['compare']['is_enabled']:
              # TODO: global compare override
              unit = re.split('\d+', facet['properties']['compare']['gap'])[1]
              json_facets[facet['id']] = {
                'type': 'range',
                'field': collection['timeFilter'].get('field'),
                'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']),
                'end': 'NOW/%s' % unit,
                'gap': '+%(gap)s' % facet['properties']['compare'],
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            if facet['properties']['filter']['is_enabled']:
              json_facets[facet['id']] = {
                'type': 'query',
                'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(),
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            json_facets['processEmpty'] = True
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

    params += self._get_fq(collection, query)

    fl = urllib_unquote(utf_quoter(','.join(Collection2.get_field_list(collection))))

    nested_fields = self._get_nested_fields(collection)
    if nested_fields:
      fl += urllib_unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields)))

    if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents
      id_field = collection.get('idField', 'id')
      params += (
        ('mlt', 'true'),
        ('mlt.fl', fl.replace(',%s' % id_field, '')),
        ('mlt.mintf', 1),
        ('mlt.mindf', 1),
        ('mlt.maxdf', 50),
        ('mlt.maxntp', 1000),
        ('mlt.count', 10),
        #('mlt.minwl', 1),
        #('mlt.maxwl', 1),
      )
      fl = '*'

    params += (('fl', fl),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 5),
      ('hl.fragsize', 1000),
    )

    #if query.get('timezone'):
    #  params += (('TZ', query.get('timezone')),)

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = [attribute for attribute in collection['template']['fieldsAttributes'] if field == attribute['name']]
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    if json_facets:
      response = self._root.post(
          '%(collection)s/select' % solr_query,
          params,
          data=json.dumps({'facet': json_facets}),
          contenttype='application/json')
    else:
      response = self._root.get('%(collection)s/select' % solr_query, params)

    return self._get_json(response)


  def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, can_range=None):
    facet = facets[0]
    f_name = 'dim_%02d:%s' % (dim, facet['field'])

    if facet['aggregate']['function'] == 'count':
      if 'facet' not in _f:
        _f['facet'] = {f_name: {}}
      else:
        _f['facet'][f_name] = {}
      _f = _f['facet']

      sort = {'count': facet['sort']}
      for i, agg in enumerate(self._get_dimension_aggregates(facets)):
        if agg['sort'] != 'default':
          agg_function = self._get_aggregate_function(agg)
          sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']}
      if sort.get('count') == 'default':
        sort['count'] = 'desc'

      _f[f_name] = {
          'type': 'terms',
          'field': '%(field)s' % facet,
          'limit': int(facet.get('limit', 10)),
          'numBuckets': True,
          'allBuckets': True,
          'sort': sort,
          'missing': facet.get('missing', False)
          #'prefix': '' # Forbidden on numeric fields
      }
      if int(facet['mincount']):
        _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0

      if 'start' in facet and not facet.get('type') == 'field':
        _f[f_name].update({
            'type': 'range',
            'start': facet['start'],
            'end': facet['end'],
            'gap': facet['gap']
        })

        # Only on dim 1 currently
        if can_range or (timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'):
          facet['widgetType'] = widget['widgetType']
          _f[f_name].update(self._get_time_filter_query(timeFilter, facet, collection))

      if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count':
        _f['subcount'] = self._get_aggregate_function(facets[-1])

      if len(facets) > 1: # Get n+1 dimension
        if facets[1]['aggregate']['function'] == 'count':
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter, collection)
        else:
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter, collection)
    else:
      agg_function = self._get_aggregate_function(facet)
      _f['facet'] = {
          'agg_%02d_00:%s' % (dim, agg_function): agg_function
      }
      for i, _f_agg in enumerate(facets[1:], 1):
        if _f_agg['aggregate']['function'] != 'count':
          agg_function = self._get_aggregate_function(_f_agg)
          _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function
        else:
          self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension
          break


  def select(self, collection, query=None, rows=100, start=0):
    if query is None:
      query = EMPTY_QUERY.get()

    params = self._get_params() + (
        ('q', query),
        ('wt', 'json'),
        ('rows', rows),
        ('start', start),
    )

    response = self._root.get('%s/select' % collection, params)
    return self._get_json(response)


  def suggest(self, collection, query):
    try:
      params = self._get_params() + (
          ('suggest', 'true'),
          ('suggest.build', 'true'),
          ('suggest.q', query['q']),
          ('wt', 'json'),
      )
      if query.get('dictionary'):
        params += (
            ('suggest.dictionary', query['dictionary']),
        )
      response = self._root.get('%s/suggest' % collection, params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def collections(self): # To drop, used in indexer v1
    try:
      params = self._get_params() + (
          ('detail', 'true'),
          ('path', '/clusterstate.json'),
      )
      response = self._root.get('zookeeper', params=params)
      return json.loads(response['znode'].get('data', '{}'))
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def collections2(self):
    try:
      params = self._get_params() + (
          ('action', 'LIST'),
          ('wt', 'json'),
      )
      return self._root.get('admin/collections', params=params)['collections']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def config(self, name):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      response = self._root.get('%s/config' % name, params=params)
      return self._get_json(response)['config']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def configs(self):
    try:
      params = self._get_params() + (
          ('action', 'LIST'),
          ('wt', 'json'),
      )
      return self._root.get('admin/configs', params=params)['configSets']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def create_config(self, name, base_config, immutable=False):
    try:
      params = self._get_params() + (
          ('action', 'CREATE'),
          ('name', name),
          ('baseConfigSet', base_config),
          ('configSetProp.immutable', immutable),
          ('wt', 'json'),
      )
      return self._root.post('admin/configs', params=params, contenttype='application/json')
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def delete_config(self, name):
    response = {'status': -1, 'message': ''}

    try:
      params = self._get_params() + (
        ('action', 'DELETE'),
        ('name', name),
        ('wt', 'json')
      )

      data = self._root.get('admin/configs', params=params)
      if data['responseHeader']['status'] == 0:
        response['status'] = 0
      else:
        response['message'] = "Could not remove config: %s" % data
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))
    return response


  def list_aliases(self):
    try:
      params = self._get_params() + (
          ('action', 'LISTALIASES'),
          ('wt', 'json'),
      )
      return self._root.get('admin/collections', params=params)['aliases'] or []
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def collection_or_core(self, hue_collection):
    if hue_collection.is_core_only:
      return self.core(hue_collection.name)
    else:
      return self.collection(hue_collection.name)


  def collection(self, name):
    try:
      collections = self.collections()
      return collections[name]
    except Exception as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def create_collection2(self, name, config_name=None, shards=1, replication=1, **kwargs):
    try:
      params = self._get_params() + (
        ('action', 'CREATE'),
        ('name', name),
        ('numShards', shards),
        ('replicationFactor', replication),
        ('wt', 'json')
      )
      if config_name:
        params += (
          ('collection.configName', config_name),
        )
      if kwargs:
        params += tuple(((key, val) for key, val in kwargs.items()))

      response = self._root.post('admin/collections', params=params, contenttype='application/json')
      response_data = self._get_json(response)
      if response_data.get('failure'):
        raise PopupException(_('Collection could not be created: %(failure)s') % response_data)
      else:
        return response_data
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def update_config(self, name, properties):
    try:
      params = self._get_params() + (
        ('wt', 'json'),
      )

      response = self._root.post('%(collection)s/config' % {'collection': name}, params=params, data=json.dumps(properties), contenttype='application/json')
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def add_fields(self, name, fields):
    try:
      params = self._get_params() + (
        ('wt', 'json'),
      )

      data = {'add-field': fields}

      response = self._root.post('%(collection)s/schema' % {'collection': name}, params=params, data=json.dumps(data), contenttype='application/json')
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def create_core(self, name, instance_dir, shards=1, replication=1):
    try:
      params = self._get_params() + (
        ('action', 'CREATE'),
        ('name', name),
        ('instanceDir', instance_dir),
        ('wt', 'json'),
      )

      response = self._root.post('admin/cores', params=params, contenttype='application/json')
      if response.get('responseHeader', {}).get('status', -1) == 0:
        return True
      else:
        LOG.error("Could not create core. Check response:\n%s" % json.dumps(response, indent=2))
        return False
    except RestException as e:
      if 'already exists' in e.message:
        LOG.warn("Could not create collection.", exc_info=True)
        return False
      else:
        raise PopupException(e, title=_('Error while accessing Solr'))


  def create_alias(self, name, collections):
    try:
      params = self._get_params() + (
        ('action', 'CREATEALIAS'),
        ('name', name),
        ('collections', ','.join(collections)),
        ('wt', 'json'),
      )

      response = self._root.post('admin/collections', params=params, contenttype='application/json')
      if response.get('responseHeader', {}).get('status', -1) != 0:
        raise PopupException(_("Could not create or edit alias: %s") % response)
      else:
        return response
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def delete_alias(self, name):
    try:
      params = self._get_params() + (
        ('action', 'DELETEALIAS'),
        ('name', name),
        ('wt', 'json'),
      )

      response = self._root.post('admin/collections', params=params, contenttype='application/json')
      if response.get('responseHeader', {}).get('status', -1) != 0:
        msg = _("Could not delete alias. Check response:\n%s") % json.dumps(response, indent=2)
        LOG.error(msg)
        raise PopupException(msg)
    except RestException as e:
        raise PopupException(e, title=_('Error while accessing Solr'))


  def delete_collection(self, name):
    response = {'status': -1, 'message': ''}

    try:
      params = self._get_params() + (
        ('action', 'DELETE'),
        ('name', name),
        ('wt', 'json')
      )

      data = self._root.post('admin/collections', params=params, contenttype='application/json')
      if data['responseHeader']['status'] == 0:
        response['status'] = 0
      else:
        response['message'] = "Could not remove collection: %s" % data
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))
    return response


  def remove_core(self, name):
    try:
      params = self._get_params() + (
        ('action', 'UNLOAD'),
        ('name', name),
        ('deleteIndex', 'true'),
        ('wt', 'json')
      )

      response = self._root.post('admin/cores', params=params, contenttype='application/json')
      if 'success' in response:
        return True
      else:
        LOG.error("Could not remove core. Check response:\n%s" % json.dumps(response, indent=2))
        return False
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def cores(self):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      return self._root.get('admin/cores', params=params)['status']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def core(self, core):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
          ('core', core),
      )
      return self._root.get('admin/cores', params=params)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def get_schema(self, collection):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      response = self._root.get('%(core)s/schema' % {'core': collection}, params=params)
      return self._get_json(response)['schema']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  # Deprecated
  def schema(self, core):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
          ('file', 'schema.xml'),
      )
      return self._root.get('%(core)s/admin/file' % {'core': core}, params=params)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def fields(self, core, dynamic=False):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
          ('fl', '*'),
      )
      if not dynamic:
        params += (('show', 'schema'),)
      response = self._root.get('%(core)s/admin/luke' % {'core': core}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def luke(self, core):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      response = self._root.get('%(core)s/admin/luke' % {'core': core}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def schema_fields(self, core):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      response = self._root.get('%(core)s/schema' % {'core': core}, params=params)
      response_json = self._get_json(response)
      fields = response_json['schema']['fields']
      if response_json['schema'].get('uniqueKey'):
        for field in fields:
          if field['name'] == response_json['schema']['uniqueKey']:
            field['primary_key'] = 'true'
      return {
        'fields': fields,
        'responseHeader': response_json['responseHeader']
      }
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def stats(self, core, fields, query=None, facet=''):
    try:
      params = self._get_params() + (
          ('q', self._get_q(query) if query is not None else EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', 0),
          ('stats', 'true'),
      )

      if query is not None:
        params += self._get_fq(None, query)

      if facet:
        params += (('stats.facet', facet),)

      params += tuple([('stats.field', field) for field in fields])
      response = self._root.get('%(core)s/select' % {'core': core}, params=params)

      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def terms(self, core, field, properties=None):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
          ('rows', 0),
          ('terms.fl', field),
      )
      if properties:
        for key, val in properties.items():
          params += ((key, val),)

      response = self._root.get('%(core)s/terms' % {'core': core}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def info_system(self):
    try:
      params = self._get_params() + (
        ('wt', 'json'),
      )

      response = self._root.get('admin/info/system', params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def sql(self, collection, statement):
    try:
      if 'limit' not in statement.lower(): # rows is not supported
        statement = statement + ' LIMIT 100'

      params = self._get_params() + (
          ('wt', 'json'),
          ('rows', 0),
          ('stmt', statement),
          ('rows', 100),
          ('start', 0),
      )

      response = self._root.get('%(collection)s/sql' % {'collection': collection}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))

  def get(self, core, doc_id):
    collection_name = core['name']
    try:
      params = self._get_params() + (
          ('id', doc_id),
          ('wt', 'json'),
      )
      response = self._root.get('%(core)s/get' % {'core': collection_name}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def export(self, name, query, fl, sort, rows=100):
    try:
      params = self._get_params() + (
          ('q', query),
          ('fl', fl),
          ('sort', sort),
          ('rows', rows),
          ('wt', 'json'),
      )
      response = self._root.get('%(name)s/export' % {'name': name}, params=params)
      return self._get_json(response)
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def update(self, collection_or_core_name, data, content_type='csv', version=None, **kwargs):
    if content_type == 'csv':
      content_type = 'application/csv'
    elif content_type == 'json':
      content_type = 'application/json'
    else:
      LOG.error("Trying to update collection  %s with content type %s. Allowed content types: csv/json" % (collection_or_core_name, content_type))

    params = self._get_params() + (
        ('wt', 'json'),
        ('overwrite', 'true'),
        ('commit', 'true'),
    )
    if version is not None:
      params += (
        ('_version_', version),
        ('versions', 'true')
      )
    if kwargs:
      params += tuple(((key, val) for key, val in kwargs.items()))

    response = self._root.post('%s/update' % collection_or_core_name, contenttype=content_type, params=params, data=data)
    return self._get_json(response)


  # Deprecated
  def aliases(self):
    try:
      params = self._get_params() + ( # Waiting for SOLR-4968
          ('detail', 'true'),
          ('path', '/aliases.json'),
      )
      response = self._root.get('zookeeper', params=params)
      return json.loads(response['znode'].get('data', '{}')).get('collection', {})
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  # Deprecated
  def create_collection(self, name, shards=1, replication=1):
    try:
      params = self._get_params() + (
        ('action', 'CREATE'),
        ('name', name),
        ('numShards', shards),
        ('replicationFactor', replication),
        ('collection.configName', name),
        ('wt', 'json')
      )

      response = self._root.post('admin/collections', params=params, contenttype='application/json')
      if 'success' in response:
        return True
      else:
        LOG.error("Could not create collection. Check response:\n%s" % json.dumps(response, indent=2))
        return False
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  # Deprecated
  def remove_collection(self, name):
    try:
      params = self._get_params() + (
        ('action', 'DELETE'),
        ('name', name),
        ('wt', 'json')
      )

      response = self._root.post('admin/collections', params=params, contenttype='application/json')
      if 'success' in response:
        return True
      else:
        LOG.error("Could not remove collection. Check response:\n%s" % json.dumps(response, indent=2))
        return False
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))


  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', SERVER_USER.get()), ('doAs', self._user),)

  def _get_q(self, query):
    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
    return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8')

  @classmethod
  def _get_aggregate_function(cls, facet):
    f = facet['aggregate']

    if f['function'] == 'formula':
      return f['formula']
    elif f['function'] == 'field':
      return f['value']
    else:
      fields = [facet['field']]
      if f['function'] == 'median':
        f['function'] = 'percentile'
        fields.append('50')
      elif f['function'] == 'percentile':
        fields.append(str(f['percentile']))
        f['function'] = 'percentile'
      return '%s(%s)' % (f['function'], ','.join(fields))

  def _get_range_borders(self, collection, query):
    props = {}

    time_field = collection['timeFilter'].get('field')

    if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'):
      # fqs overrides main time filter
      # No longer override
      props['time_filter_overrides'] = []
      props['time_field'] = time_field

      if collection['timeFilter']['type'] == 'rolling':
        props['field'] = collection['timeFilter']['field']
        props['from'] = 'NOW-%s' % collection['timeFilter']['value']
        props['to'] = 'NOW'
        props['gap'] = GAPS.get(collection['timeFilter']['value'])
      elif collection['timeFilter']['type'] == 'fixed':
        props['field'] = collection['timeFilter']['field']
        props['from'] = collection['timeFilter'].get('from', 'NOW-7DAYS')
        props['to'] = collection['timeFilter'].get('to', 'NOW')
        props['fixed'] = True

    return props

  def _get_time_filter_query(self, timeFilter, facet, collection):
    properties = facet.get('properties', facet)
    if timeFilter:
      props = {}
      # If the start & end are equal to min/max, then we want to show the whole domain (either interval now-x or static)
      # In that case use timeFilter values
      if properties['start'] == properties['min'] and properties['end'] == properties['max']:
        stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
        properties['start'] = None
        properties['end'] = None
      else: # The user has zoomed in. Only show that section.
        stat_facet = {'min': properties['min'], 'max': properties['max']}
      _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'],
                           SLOTS=properties['slot'])
      gap = props['gap']
      return {
        'min': '%(min)s' % props,
        'max': '%(max)s' % props,
        'start': '%(start)s' % props,
        'end': '%(end)s' % props,
        'gap': '%(gap)s' % props,
      }
    else:
      props = {}
      # If the start & end are equal to min/max, then we want to show the whole domain. Since min/max can change, we fetch latest values and update start/end
      if properties['start'] == properties['min'] and properties['end'] == properties['max']:
        stats_json = self.stats(collection['name'], [facet['field']])
        stat_facet = stats_json['stats']['stats_fields'][facet['field']]
        properties['start'] = None
        properties['end'] = None
      else: # the user has zoomed in. Only show that section.
        stat_facet = {'min': properties['min'], 'max': properties['max']}
      _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS = properties['slot'])
      return {
        'start': '%(start)s' % props,
        'end': '%(end)s' % props,
        'gap': '%(gap)s' % props,
        'min': '%(min)s' % props,
        'max': '%(max)s' % props,
      }

  def _get_fq(self, collection, query):
    params = ()
    timeFilter = {}

    if collection:
      timeFilter = self._get_range_borders(collection, query)
    if timeFilter and not timeFilter.get('time_filter_overrides'):
      params += (('fq', urllib_unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),)

    # Merge facets queries on same fields
    grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
    merged_fqs = []
    for key, group in grouped_fqs:
      field_fq = next(group)
      for fq in group:
        for f in fq['filter']:
          field_fq['filter'].append(f)
      merged_fqs.append(field_fq)

    for fq in merged_fqs:
      if fq['type'] == 'field':
        fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support
        for field in fields:
          f = []
          for _filter in fq['filter']:
            values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support
            if fields.index(field) < len(values): # Lowest common field denominator
              value = values[fields.index(field)]
              if value or value is False:
                exclude = '-' if _filter['exclude'] else ''
                if value is not None and ' ' in force_unicode(value):
                  value = force_unicode(value).replace('"', '\\"')
                  f.append('%s%s:"%s"' % (exclude, field, value))
                else:
                  f.append('%s{!field f=%s}%s' % (exclude, field, value))
              else: # Handle empty value selection that are returned using solr facet.missing
                value = "*"
                exclude = '-'
                f.append('%s%s:%s' % (exclude, field, value))
          _params ='{!tag=%(id)s}' % fq + ' '.join(f)
          params += (('fq', urllib_unquote(utf_quoter(_params))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib_unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'range-up':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib_unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from'])))
                                                          for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'map':
        _keys = fq.copy()
        _keys.update(fq['properties'])
        params += (('fq', '{!tag=%(id)s}' % fq + urllib_unquote(
                    utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),)

    nested_fields = self._get_nested_fields(collection)
    if nested_fields:
      params += (('fq', urllib_unquote(utf_quoter(' OR '.join(nested_fields)))),)

    return params


  def _get_dimension_aggregates(self, facets):
    aggregates = []
    for agg in facets:
      if agg['aggregate']['function'] != 'count':
        aggregates.append(agg)
      else:
        return aggregates
    return aggregates


  def _get_nested_fields(self, collection):
    if collection and collection.get('nested') and collection['nested']['enabled']:
      return [field['filter'] for field in self._flatten_schema(collection['nested']['schema']) if field['selected']]
    else:
      return []


  def _flatten_schema(self, level):
    fields = []
    for field in level:
      fields.append(field)
      if field['values']:
        fields.extend(self._flatten_schema(field['values']))
    return fields


  @classmethod
  def _get_json(cls, response):
    if type(response) != dict:
      # Got 'plain/text' mimetype instead of 'application/json'
      try:
        response = json.loads(response)
      except ValueError as e:
        # Got some null bytes in the response
        LOG.error('%s: %s' % (new_str(e), repr(response)))
        response = json.loads(response.replace('\x00', ''))
    return response


  def uniquekey(self, collection):
    try:
      params = self._get_params() + (
          ('wt', 'json'),
      )
      response = self._root.get('%s/schema/uniquekey' % collection, params=params)
      return self._get_json(response)['uniqueKey']
    except RestException as e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemple #34
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self,
                 solr_url,
                 user,
                 security_enabled=SECURITY_ENABLED.get(),
                 ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke('HEAD', '/')

    def _get_params(self):
        if self.security_enabled:
            return (('doAs', self._user), )
        return (
            ('user.name', SERVER_USER.get()),
            ('doAs', self._user),
        )

    def _get_q(self, query):
        q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
        return 'OR'.join([
            q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']
        ]).encode('utf-8')

    def _get_aggregate_function(self, facet):
        props = {
            'field':
            facet['field'],
            'aggregate':
            facet['properties']['aggregate']
            if 'properties' in facet else facet['aggregate']
        }

        if props['aggregate'] == 'median':
            return 'percentile(%(field)s,50)' % props
        else:
            return '%(aggregate)s(%(field)s)' % props

    def _get_range_borders(self, collection, query):
        props = {}
        GAPS = {
            '5MINUTES': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'MINUTES'
                },  # ~10 slots
            },
            '30MINUTES': {
                'histogram-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'bucket-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'bar-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'facet-widget': {
                    'coeff': '+5',
                    'unit': 'MINUTES'
                },
            },
            '1HOURS': {
                'histogram-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'bucket-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'bar-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'facet-widget': {
                    'coeff': '+10',
                    'unit': 'MINUTES'
                },
            },
            '12HOURS': {
                'histogram-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'HOURS'
                },
            },
            '1DAYS': {
                'histogram-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
            },
            '2DAYS': {
                'histogram-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+6',
                    'unit': 'HOURS'
                },
            },
            '7DAYS': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
            },
            '1MONTHS': {
                'histogram-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'bucket-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'bar-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'facet-widget': {
                    'coeff': '+5',
                    'unit': 'DAYS'
                },
            },
            '3MONTHS': {
                'histogram-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+30',
                    'unit': 'DAYS'
                },
            },
            '1YEARS': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+12',
                    'unit': 'MONTHS'
                },
            },
            '2YEARS': {
                'histogram-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+3',
                    'unit': 'MONTHS'
                },
            },
            '10YEARS': {
                'histogram-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'bucket-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'bar-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'YEARS'
                },
            }
        }

        time_field = collection['timeFilter'].get('field')

        if time_field and (collection['timeFilter']['value'] != 'all'
                           or collection['timeFilter']['type'] == 'fixed'):
            # fqs overrides main time filter
            fq_time_ids = [
                fq['id'] for fq in query['fqs'] if fq['field'] == time_field
            ]
            props['time_filter_overrides'] = fq_time_ids
            props['time_field'] = time_field

            if collection['timeFilter']['type'] == 'rolling':
                props['field'] = collection['timeFilter']['field']
                props['from'] = 'NOW-%s' % collection['timeFilter']['value']
                props['to'] = 'NOW'
                props['gap'] = GAPS.get(collection['timeFilter']['value'])
            elif collection['timeFilter']['type'] == 'fixed':
                props['field'] = collection['timeFilter']['field']
                props['from'] = collection['timeFilter']['from']
                props['to'] = collection['timeFilter']['to']
                props['fixed'] = True

        return props

    def _get_time_filter_query(self, timeFilter, facet):
        if 'fixed' in timeFilter:
            props = {}
            stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
            _compute_range_facet(facet['widgetType'], stat_facet, props,
                                 stat_facet['min'], stat_facet['max'])
            gap = props['gap']
            unit = re.split('\d+', gap)[1]
            return {
                'start': '%(from)s/%(unit)s' % {
                    'from': timeFilter['from'],
                    'unit': unit
                },
                'end': '%(to)s/%(unit)s' % {
                    'to': timeFilter['to'],
                    'unit': unit
                },
                'gap': '%(gap)s' % props,  # add a 'auto'
            }
        else:
            gap = timeFilter['gap'][facet['widgetType']]
            return {
                'start': '%(from)s/%(unit)s' % {
                    'from': timeFilter['from'],
                    'unit': gap['unit']
                },
                'end': '%(to)s/%(unit)s' % {
                    'to': timeFilter['to'],
                    'unit': gap['unit']
                },
                'gap': '%(coeff)s%(unit)s/%(unit)s' % gap,  # add a 'auto'
            }

    def _get_fq(self, collection, query):
        params = ()
        timeFilter = {}

        if collection:
            timeFilter = self._get_range_borders(collection, query)
        if timeFilter and not timeFilter.get('time_filter_overrides'):
            params += (('fq',
                        urllib.unquote(
                            utf_quoter('%(field)s:[%(from)s TO %(to)s]' %
                                       timeFilter))), )

        # Merge facets queries on same fields
        grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
        merged_fqs = []
        for key, group in grouped_fqs:
            field_fq = next(group)
            for fq in group:
                for f in fq['filter']:
                    field_fq['filter'].append(f)
            merged_fqs.append(field_fq)

        for fq in merged_fqs:
            if fq['type'] == 'field':
                fields = fq['field'] if type(fq['field']) == list else [
                    fq['field']
                ]  # 2D facets support
                for field in fields:
                    f = []
                    for _filter in fq['filter']:
                        values = _filter['value'] if type(
                            _filter['value']) == list else [
                                _filter['value']
                            ]  # 2D facets support
                        if fields.index(field) < len(
                                values):  # Lowest common field denominator
                            value = values[fields.index(field)]
                            exclude = '-' if _filter['exclude'] else ''
                            if value is not None and ' ' in force_unicode(
                                    value):
                                value = force_unicode(value).replace(
                                    '"', '\\"')
                                f.append('%s%s:"%s"' % (exclude, field, value))
                            else:
                                f.append('%s{!field f=%s}%s' %
                                         (exclude, field, value))
                    _params = '{!tag=%(id)s}' % fq + ' '.join(f)
                    params += (('fq', urllib.unquote(utf_quoter(_params))), )
            elif fq['type'] == 'range':
                params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([
                    urllib.unquote(
                        utf_quoter('%s%s:[%s TO %s}' %
                                   ('-' if field['exclude'] else '',
                                    fq['field'], f['from'], f['to'])))
                    for field, f in zip(fq['filter'], fq['properties'])
                ])), )
            elif fq['type'] == 'range-up':
                params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([
                    urllib.unquote(
                        utf_quoter('%s%s:[%s TO %s}' %
                                   ('-' if field['exclude'] else '',
                                    fq['field'], f['from'] if fq['is_up'] else
                                    '*', '*' if fq['is_up'] else f['from'])))
                    for field, f in zip(fq['filter'], fq['properties'])
                ])), )
            elif fq['type'] == 'map':
                _keys = fq.copy()
                _keys.update(fq['properties'])
                params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote(
                    utf_quoter(
                        '%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}'
                        % _keys))), )

        return params

    def query(self, collection, query):
        solr_query = {}

        solr_query['collection'] = collection['name']

        if query.get('download'):
            solr_query['rows'] = 1000
            solr_query['start'] = 0
        else:
            solr_query['rows'] = int(collection['template']['rows'] or 10)
            solr_query['start'] = int(query['start'])

        solr_query['rows'] = min(solr_query['rows'], 1000)
        solr_query['start'] = min(solr_query['start'], 10000)

        params = self._get_params() + (
            ('q', self._get_q(query)),
            ('wt', 'json'),
            ('rows', solr_query['rows']),
            ('start', solr_query['start']),
        )

        if any(collection['facets']):
            params += (
                ('facet', 'true'),
                ('facet.mincount', 0),
                ('facet.limit', 10),
            )
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection['facets']:
                if facet['type'] == 'query':
                    params += (('facet.query', '%s' % facet['field']), )
                elif facet['type'] == 'range' or facet['type'] == 'range-up':
                    keys = {
                        'id': '%(id)s' % facet,
                        'field': facet['field'],
                        'key': '%(field)s-%(id)s' % facet,
                        'start': facet['properties']['start'],
                        'end': facet['properties']['end'],
                        'gap': facet['properties']['gap'],
                        'mincount': int(facet['properties']['mincount'])
                    }

                    if timeFilter and timeFilter['time_field'] == facet[
                            'field'] and (
                                facet['id']
                                not in timeFilter['time_filter_overrides']
                                or facet['widgetType'] != 'histogram-widget'):
                        keys.update(
                            self._get_time_filter_query(timeFilter, facet))

                    params += ((
                        'facet.range',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'field':
                    keys = {
                        'id':
                        '%(id)s' % facet,
                        'field':
                        facet['field'],
                        'key':
                        '%(field)s-%(id)s' % facet,
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }
                    params += ((
                        'facet.field',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'nested':
                    _f = {
                        'field':
                        facet['field'],
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }

                    if 'start' in facet['properties']:
                        _f.update({
                            'type': 'range',
                            'start': facet['properties']['start'],
                            'end': facet['properties']['end'],
                            'gap': facet['properties']['gap'],
                        })
                        if timeFilter and timeFilter['time_field'] == facet[
                                'field'] and (
                                    facet['id']
                                    not in timeFilter['time_filter_overrides']
                                    or facet['widgetType'] != 'bucket-widget'):
                            _f.update(
                                self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({
                            'type': 'terms',
                            'field': facet['field'],
                            'excludeTags': facet['id']
                        })

                    if facet['properties']['facets']:
                        if facet['properties']['facets'][0][
                                'aggregate'] == 'count':
                            _f['facet'] = {
                                'd2': {
                                    'type':
                                    'terms',
                                    'field':
                                    '%(field)s' %
                                    facet['properties']['facets'][0],
                                    'limit':
                                    int(facet['properties']['facets'][0].get(
                                        'limit', 10)),
                                    'mincount':
                                    int(facet['properties']['facets'][0]
                                        ['mincount'])
                                }
                            }
                            if len(facet['properties']['facets']
                                   ) > 1:  # Get 3rd dimension calculation
                                _f['facet']['d2']['facet'] = {
                                    'd2':
                                    self._get_aggregate_function(
                                        facet['properties']['facets'][1])
                                }
                        else:
                            _f['facet'] = {
                                'd2':
                                self._get_aggregate_function(
                                    facet['properties']['facets'][0])
                            }

                    json_facets[facet['id']] = _f
                elif facet['type'] == 'function':
                    json_facets[facet['id']] = self._get_aggregate_function(
                        facet)
                    json_facets['processEmpty'] = True
                elif facet['type'] == 'pivot':
                    if facet['properties']['facets'] or facet[
                            'widgetType'] == 'map-widget':
                        fields = facet['field']
                        fields_limits = []
                        for f in facet['properties']['facets']:
                            fields_limits.append('f.%s.facet.limit=%s' %
                                                 (f['field'], f['limit']))
                            fields_limits.append('f.%s.facet.mincount=%s' %
                                                 (f['field'], f['mincount']))
                            fields += ',' + f['field']
                        keys = {
                            'id': '%(id)s' % facet,
                            'key': '%(field)s-%(id)s' % facet,
                            'field': facet['field'],
                            'fields': fields,
                            'limit': int(facet['properties'].get('limit', 10)),
                            'mincount': int(facet['properties']['mincount']),
                            'fields_limits': ' '.join(fields_limits)
                        }
                        params += ((
                            'facet.pivot',
                            '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s'
                            % keys), )

            if json_facets:
                params += (('json.facet', json.dumps(json_facets)), )

        params += self._get_fq(collection, query)

        if collection['template']['fieldsSelected'] and collection['template'][
                'isGridLayout']:
            fields = set(
                collection['template']['fieldsSelected'] +
                [collection['idField']] if collection['idField'] else [])
            # Add field if needed
            if collection['template']['leafletmap'].get('latitudeField'):
                fields.add(
                    collection['template']['leafletmap']['latitudeField'])
            if collection['template']['leafletmap'].get('longitudeField'):
                fields.add(
                    collection['template']['leafletmap']['longitudeField'])
            if collection['template']['leafletmap'].get('labelField'):
                fields.add(collection['template']['leafletmap']['labelField'])
            params += (('fl',
                        urllib.unquote(utf_quoter(','.join(list(fields))))), )
        else:
            params += (('fl', '*'), )

        params += (
            ('hl', 'true'),
            ('hl.fl', '*'),
            ('hl.snippets', 5),
            ('hl.fragsize', 1000),
        )

        if collection['template']['fieldsSelected']:
            fields = []
            for field in collection['template']['fieldsSelected']:
                attribute_field = filter(
                    lambda attribute: field == attribute['name'],
                    collection['template']['fieldsAttributes'])
                if attribute_field:
                    if attribute_field[0]['sort']['direction']:
                        fields.append(
                            '%s %s' %
                            (field, attribute_field[0]['sort']['direction']))
            if fields:
                params += (('sort', ','.join(fields)), )

        response = self._root.get('%(collection)s/select' % solr_query, params)
        return self._get_json(response)

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ('suggest', 'true'),
                ('suggest.build', 'true'),
                ('suggest.q', query['q']),
                ('wt', 'json'),
            )
            if query.get('dictionary'):
                params += (('suggest.dictionary', query['dictionary']), )
            response = self._root.get('%s/suggest' % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Solr'))
Exemple #35
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """

    def __init__(
        self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()
    ):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke("HEAD", "/")

    def _get_params(self):
        if self.security_enabled:
            return (("doAs", self._user),)
        return (("user.name", DEFAULT_USER), ("doAs", self._user))

    def _get_q(self, query):
        q_template = "(%s)" if len(query["qs"]) >= 2 else "%s"
        return "OR".join([q_template % (q["q"] or EMPTY_QUERY.get()) for q in query["qs"]]).encode("utf-8")

    def _get_aggregate_function(self, facet):
        props = {
            "field": facet["field"],
            "aggregate": facet["properties"]["aggregate"] if "properties" in facet else facet["aggregate"],
        }

        if props["aggregate"] == "median":
            return "percentile(%(field)s,50)" % props
        else:
            return "%(aggregate)s(%(field)s)" % props

    def _get_range_borders(self, collection, query):
        props = {}
        GAPS = {
            "5MINUTES": {
                "histogram-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "bucket-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "bar-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "facet-widget": {"coeff": "+1", "unit": "MINUTES"},  # ~10 slots
            },
            "30MINUTES": {
                "histogram-widget": {"coeff": "+20", "unit": "SECONDS"},
                "bucket-widget": {"coeff": "+20", "unit": "SECONDS"},
                "bar-widget": {"coeff": "+20", "unit": "SECONDS"},
                "facet-widget": {"coeff": "+5", "unit": "MINUTES"},
            },
            "1HOURS": {
                "histogram-widget": {"coeff": "+30", "unit": "SECONDS"},
                "bucket-widget": {"coeff": "+30", "unit": "SECONDS"},
                "bar-widget": {"coeff": "+30", "unit": "SECONDS"},
                "facet-widget": {"coeff": "+10", "unit": "MINUTES"},
            },
            "12HOURS": {
                "histogram-widget": {"coeff": "+7", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+7", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+7", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+1", "unit": "HOURS"},
            },
            "1DAYS": {
                "histogram-widget": {"coeff": "+15", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+15", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+15", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+3", "unit": "HOURS"},
            },
            "2DAYS": {
                "histogram-widget": {"coeff": "+30", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+30", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+30", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+6", "unit": "HOURS"},
            },
            "7DAYS": {
                "histogram-widget": {"coeff": "+3", "unit": "HOURS"},
                "bucket-widget": {"coeff": "+3", "unit": "HOURS"},
                "bar-widget": {"coeff": "+3", "unit": "HOURS"},
                "facet-widget": {"coeff": "+1", "unit": "DAYS"},
            },
            "1MONTHS": {
                "histogram-widget": {"coeff": "+12", "unit": "HOURS"},
                "bucket-widget": {"coeff": "+12", "unit": "HOURS"},
                "bar-widget": {"coeff": "+12", "unit": "HOURS"},
                "facet-widget": {"coeff": "+5", "unit": "DAYS"},
            },
            "3MONTHS": {
                "histogram-widget": {"coeff": "+1", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+1", "unit": "DAYS"},
                "bar-widget": {"coeff": "+1", "unit": "DAYS"},
                "facet-widget": {"coeff": "+30", "unit": "DAYS"},
            },
            "1YEARS": {
                "histogram-widget": {"coeff": "+3", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+3", "unit": "DAYS"},
                "bar-widget": {"coeff": "+3", "unit": "DAYS"},
                "facet-widget": {"coeff": "+12", "unit": "MONTHS"},
            },
            "2YEARS": {
                "histogram-widget": {"coeff": "+7", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+7", "unit": "DAYS"},
                "bar-widget": {"coeff": "+7", "unit": "DAYS"},
                "facet-widget": {"coeff": "+3", "unit": "MONTHS"},
            },
            "10YEARS": {
                "histogram-widget": {"coeff": "+1", "unit": "MONTHS"},
                "bucket-widget": {"coeff": "+1", "unit": "MONTHS"},
                "bar-widget": {"coeff": "+1", "unit": "MONTHS"},
                "facet-widget": {"coeff": "+1", "unit": "YEARS"},
            },
        }

        time_field = collection["timeFilter"].get("field")

        if time_field and (collection["timeFilter"]["value"] != "all" or collection["timeFilter"]["type"] == "fixed"):
            # fqs overrides main time filter
            fq_time_ids = [fq["id"] for fq in query["fqs"] if fq["field"] == time_field]
            props["time_filter_overrides"] = fq_time_ids
            props["time_field"] = time_field

            if collection["timeFilter"]["type"] == "rolling":
                props["field"] = collection["timeFilter"]["field"]
                props["from"] = "NOW-%s" % collection["timeFilter"]["value"]
                props["to"] = "NOW"
                props["gap"] = GAPS.get(collection["timeFilter"]["value"])
            elif collection["timeFilter"]["type"] == "fixed":
                props["field"] = collection["timeFilter"]["field"]
                props["from"] = collection["timeFilter"]["from"]
                props["to"] = collection["timeFilter"]["to"]
                props["fixed"] = True

        return props

    def _get_time_filter_query(self, timeFilter, facet):
        if "fixed" in timeFilter:
            props = {}
            stat_facet = {"min": timeFilter["from"], "max": timeFilter["to"]}
            _compute_range_facet(facet["widgetType"], stat_facet, props, stat_facet["min"], stat_facet["max"])
            gap = props["gap"]
            unit = re.split("\d+", gap)[1]
            return {
                "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": unit},
                "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": unit},
                "gap": "%(gap)s" % props,  # add a 'auto'
            }
        else:
            gap = timeFilter["gap"][facet["widgetType"]]
            return {
                "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": gap["unit"]},
                "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": gap["unit"]},
                "gap": "%(coeff)s%(unit)s/%(unit)s" % gap,  # add a 'auto'
            }

    def _get_fq(self, collection, query):
        params = ()
        timeFilter = {}

        if collection:
            timeFilter = self._get_range_borders(collection, query)
        if timeFilter and not timeFilter.get("time_filter_overrides"):
            params += (("fq", urllib.unquote(utf_quoter("%(field)s:[%(from)s TO %(to)s]" % timeFilter))),)

        # Merge facets queries on same fields
        grouped_fqs = groupby(query["fqs"], lambda x: (x["type"], x["field"]))
        merged_fqs = []
        for key, group in grouped_fqs:
            field_fq = next(group)
            for fq in group:
                for f in fq["filter"]:
                    field_fq["filter"].append(f)
            merged_fqs.append(field_fq)

        for fq in merged_fqs:
            if fq["type"] == "field":
                fields = fq["field"] if type(fq["field"]) == list else [fq["field"]]  # 2D facets support
                for field in fields:
                    f = []
                    for _filter in fq["filter"]:
                        values = (
                            _filter["value"] if type(_filter["value"]) == list else [_filter["value"]]
                        )  # 2D facets support
                        if fields.index(field) < len(values):  # Lowest common field denominator
                            value = values[fields.index(field)]
                            exclude = "-" if _filter["exclude"] else ""
                            if value is not None and " " in force_unicode(value):
                                value = force_unicode(value).replace('"', '\\"')
                                f.append('%s%s:"%s"' % (exclude, field, value))
                            else:
                                f.append("%s{!field f=%s}%s" % (exclude, field, value))
                    _params = "{!tag=%(id)s}" % fq + " ".join(f)
                    params += (("fq", urllib.unquote(utf_quoter(_params))),)
            elif fq["type"] == "range":
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + " ".join(
                            [
                                urllib.unquote(
                                    utf_quoter(
                                        "%s%s:[%s TO %s}"
                                        % ("-" if field["exclude"] else "", fq["field"], f["from"], f["to"])
                                    )
                                )
                                for field, f in zip(fq["filter"], fq["properties"])
                            ]
                        ),
                    ),
                )
            elif fq["type"] == "range-up":
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + " ".join(
                            [
                                urllib.unquote(
                                    utf_quoter(
                                        "%s%s:[%s TO %s}"
                                        % (
                                            "-" if field["exclude"] else "",
                                            fq["field"],
                                            f["from"] if fq["is_up"] else "*",
                                            "*" if fq["is_up"] else f["from"],
                                        )
                                    )
                                )
                                for field, f in zip(fq["filter"], fq["properties"])
                            ]
                        ),
                    ),
                )
            elif fq["type"] == "map":
                _keys = fq.copy()
                _keys.update(fq["properties"])
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + urllib.unquote(
                            utf_quoter(
                                "%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}" % _keys
                            )
                        ),
                    ),
                )

        return params

    def query(self, collection, query):
        solr_query = {}

        solr_query["collection"] = collection["name"]

        if query.get("download"):
            solr_query["rows"] = 1000
            solr_query["start"] = 0
        else:
            solr_query["rows"] = int(collection["template"]["rows"] or 10)
            solr_query["start"] = int(query["start"])

        solr_query["rows"] = min(solr_query["rows"], 1000)
        solr_query["start"] = min(solr_query["start"], 10000)

        params = self._get_params() + (
            ("q", self._get_q(query)),
            ("wt", "json"),
            ("rows", solr_query["rows"]),
            ("start", solr_query["start"]),
        )

        if any(collection["facets"]):
            params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10))
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection["facets"]:
                if facet["type"] == "query":
                    params += (("facet.query", "%s" % facet["field"]),)
                elif facet["type"] == "range" or facet["type"] == "range-up":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "start": facet["properties"]["start"],
                        "end": facet["properties"]["end"],
                        "gap": facet["properties"]["gap"],
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if (
                        timeFilter
                        and timeFilter["time_field"] == facet["field"]
                        and (
                            facet["id"] not in timeFilter["time_filter_overrides"]
                            or facet["widgetType"] != "histogram-widget"
                        )
                    ):
                        keys.update(self._get_time_filter_query(timeFilter, facet))

                    params += (
                        (
                            "facet.range",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "field":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }
                    params += (
                        (
                            "facet.field",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "nested":
                    _f = {
                        "field": facet["field"],
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if "start" in facet["properties"]:
                        _f.update(
                            {
                                "type": "range",
                                "start": facet["properties"]["start"],
                                "end": facet["properties"]["end"],
                                "gap": facet["properties"]["gap"],
                            }
                        )
                        if (
                            timeFilter
                            and timeFilter["time_field"] == facet["field"]
                            and (
                                facet["id"] not in timeFilter["time_filter_overrides"]
                                or facet["widgetType"] != "bucket-widget"
                            )
                        ):
                            _f.update(self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({"type": "terms", "field": facet["field"], "excludeTags": facet["id"]})

                    if facet["properties"]["facets"]:
                        if facet["properties"]["facets"][0]["aggregate"] == "count":
                            _f["facet"] = {
                                "d2": {
                                    "type": "terms",
                                    "field": "%(field)s" % facet["properties"]["facets"][0],
                                    "limit": int(facet["properties"]["facets"][0].get("limit", 10)),
                                    "mincount": int(facet["properties"]["facets"][0]["mincount"]),
                                }
                            }
                            if len(facet["properties"]["facets"]) > 1:  # Get 3rd dimension calculation
                                _f["facet"]["d2"]["facet"] = {
                                    "d2": self._get_aggregate_function(facet["properties"]["facets"][1])
                                }
                        else:
                            _f["facet"] = {"d2": self._get_aggregate_function(facet["properties"]["facets"][0])}

                    json_facets[facet["id"]] = _f
                elif facet["type"] == "function":
                    json_facets[facet["id"]] = self._get_aggregate_function(facet)
                    json_facets["processEmpty"] = True
                elif facet["type"] == "pivot":
                    if facet["properties"]["facets"] or facet["widgetType"] == "map-widget":
                        fields = facet["field"]
                        fields_limits = []
                        for f in facet["properties"]["facets"]:
                            fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"]))
                            fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"]))
                            fields += "," + f["field"]
                        keys = {
                            "id": "%(id)s" % facet,
                            "key": "%(field)s-%(id)s" % facet,
                            "field": facet["field"],
                            "fields": fields,
                            "limit": int(facet["properties"].get("limit", 10)),
                            "mincount": int(facet["properties"]["mincount"]),
                            "fields_limits": " ".join(fields_limits),
                        }
                        params += (
                            (
                                "facet.pivot",
                                "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s"
                                % keys,
                            ),
                        )

            if json_facets:
                params += (("json.facet", json.dumps(json_facets)),)

        params += self._get_fq(collection, query)

        if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]:
            fields = set(
                collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else []
            )
            # Add field if needed
            if collection["template"]["leafletmap"].get("latitudeField"):
                fields.add(collection["template"]["leafletmap"]["latitudeField"])
            if collection["template"]["leafletmap"].get("longitudeField"):
                fields.add(collection["template"]["leafletmap"]["longitudeField"])
            if collection["template"]["leafletmap"].get("labelField"):
                fields.add(collection["template"]["leafletmap"]["labelField"])
            params += (("fl", urllib.unquote(utf_quoter(",".join(list(fields))))),)
        else:
            params += (("fl", "*"),)

        params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000))

        if collection["template"]["fieldsSelected"]:
            fields = []
            for field in collection["template"]["fieldsSelected"]:
                attribute_field = filter(
                    lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"]
                )
                if attribute_field:
                    if attribute_field[0]["sort"]["direction"]:
                        fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"]))
            if fields:
                params += (("sort", ",".join(fields)),)

        response = self._root.get("%(collection)s/select" % solr_query, params)
        return self._get_json(response)

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ("suggest", "true"),
                ("suggest.build", "true"),
                ("suggest.q", query["q"]),
                ("wt", "json"),
            )
            if query.get("dictionary"):
                params += (("suggest.dictionary", query["dictionary"]),)
            response = self._root.get("%s/suggest" % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_("Error while accessing Solr"))
Exemple #36
0
class HistoryServerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        self._thread_local = threading.local()  # To store user info

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "HistoryServerApi at %s" % (self._url, )

    def _get_params(self):
        params = {}

        if self.username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self.username
            if not self._security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    @property
    def url(self):
        return self._url

    @property
    def user(self):
        return self.username  # Backward compatibility

    @property
    def username(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER.get()

    def setuser(self, user):
        curr = self.user
        self._thread_local.user = user
        return curr

    def job(self, user, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/counters' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/conf' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {
            'job_id': job_id,
            'task_id': task_id
        },
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #37
0
class OozieApi(object):
  def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True):
    self._url = posixpath.join(oozie_url, api_version)
    self._client = HttpClient(self._url, logger=LOG)

    if security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    # To store username info
    if hasattr(user, 'username'):
      self.user = user.username
    else:
      self.user = user
    self.api_version = api_version

  def __str__(self):
    return "OozieApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def _get_params(self):
    if self.security_enabled:
      return { 'doAs': self.user, 'timezone': TIME_ZONE.get() }
    return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() }

  def _get_oozie_properties(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    return defaults

  VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime')

  def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
    """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = []
    params['jobtype'] = jobtype

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    # Send the request
    resp = self._root.get('jobs', params)
    if jobtype == 'wf':
      wf_list = WorkflowList(self, resp, filters=filters)
    elif jobtype == 'coord':
      wf_list = CoordinatorList(self, resp, filters=filters)
    else:
      wf_list = BundleList(self, resp, filters=filters)
    return wf_list

  def get_workflows(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('wf', offset, cnt, filters)

  def get_coordinators(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('coord', offset, cnt, filters)

  def get_bundles(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('bundle', offset, cnt, filters)

  # TODO: make get_job accept any jobid
  def get_job(self, jobid):
    """
    get_job(jobid) -> Workflow
    """
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    wf = Workflow(self, resp)
    return wf

  def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = {}
    params.update({'order': 'desc'})

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    resp = self._root.get('job/%s' % (jobid,), params)
    return Coordinator(self, resp)

  def get_bundle(self, jobid):
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    return Bundle(self, resp)

  def get_job_definition(self, jobid):
    """
    get_job_definition(jobid) -> Definition (xml string)
    """
    params = self._get_params()
    params['show'] = 'definition'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_job_log(self, jobid):
    """
    get_job_log(jobid) -> Log (xml string)
    """
    params = self._get_params()
    params['show'] = 'log'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_action(self, action_id):
    if 'C@' in action_id:
      Klass = CoordinatorAction
    elif 'B@' in action_id:
      Klass = BundleAction
    else:
      Klass = WorkflowAction
    params = self._get_params()
    resp = self._root.get('job/%s' % (action_id,), params)
    return Klass(resp)

  def job_control(self, jobid, action, properties=None, parameters=None):
    """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
    if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change'):
      msg = 'Invalid oozie job action: %s' % (action,)
      LOG.error(msg)
      raise ValueError(msg)
    properties = self._get_oozie_properties(properties)
    params = self._get_params()
    params['action'] = action
    if parameters is not None:
      params.update(parameters)

    return self._root.put('job/%s' % jobid, params,  data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def submit_workflow(self, application_path, properties=None):
    """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'oozie.wf.application.path': application_path,
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)
    properties = defaults

    return self.submit_job(properties)

  # Is name actually submit_coord?
  def submit_job(self, properties=None):
    """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
    return resp['id']

  def rerun(self, jobid, properties=None, params=None):
    properties = self._get_oozie_properties(properties)
    if params is None:
      params = self._get_params()
    else:
      self._get_params().update(params)

    params['action'] = 'rerun'

    return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def get_build_version(self):
    """
    get_build_version() -> Build version (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/build-version', params)
    return resp

  def get_instrumentation(self):
    params = self._get_params()
    resp = self._root.get('admin/instrumentation', params)
    return resp

  def get_metrics(self):
    params = self._get_params()
    resp = self._root.get('admin/metrics', params)
    return resp

  def get_configuration(self):
    """
    get_configuration() -> Oozie config (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/configuration', params)
    return resp

  def get_oozie_status(self):
    """
    get_oozie_status() -> Oozie status (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/status', params)
    return resp

  def get_oozie_slas(self, **kwargs):
    """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
    params = self._get_params()
    params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
    resp = self._root.get('sla', params)
    return resp['slaSummaryList']
Exemple #38
0
class DataWarehouse2Api(object):

  def __init__(self, user=None):
    self._api_url = '%s/dw' % K8S.API_URL.get().rstrip('/')

    self.user = user
    self._client = HttpClient(self._api_url, logger=LOG)
    self._client.set_verify(False)
    self._root = Resource(self._client)


  def list_k8_clusters(self):
    clusters = self._root.post('listClusters', contenttype="application/json")
    for cluster in clusters['clusters']:
      cluster['clusterName'] = cluster['name']
      cluster['workersGroupSize'] = cluster['workerReplicas']
      cluster['instanceType'] = '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster
      cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster
      cluster['creationDate'] = str(datetime.now())
    return clusters


  def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, instance_type, environment_name, workers_group_size=3, namespace_name=None,
        cloudera_manager_username='******', cloudera_manager_password='******'):
    data = {
      'clusterName': cluster_name,
      'cdhVersion': cdh_version or 'CDH6.3',
      'workerCpuCores': 1,
      'workerMemoryInGib': 1,
      'workerReplicas': workers_group_size,
      'workerAutoResize': False
    }

    return self._root.post('createCluster', data=json.dumps(data), contenttype="application/json")


  def list_clusters(self):
    clusters = self._root.post('listClusters', contenttype="application/json")
    for cluster in clusters['clusters']:
      cluster['clusterName'] = cluster['name']
      cluster['workersGroupSize'] = cluster['workerReplicas']
      cluster['instanceType'] = 'Data Warehouse'# '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster
      cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster
      cluster['creationDate'] = str(datetime.now())
    return clusters


  def delete_cluster(self, cluster_id):
    data = json.dumps({'clusterName': cluster_id})
    return {
      'result': self._root.post('deleteCluster', data=data, contenttype="application/json")
    }


  def describe_cluster(self, cluster_id):
    data = json.dumps({'clusterName': cluster_id})
    data = self._root.post('describeCluster', data=data, contenttype="application/json")
    data['cluster']['clusterName'] = data['cluster']['name']
    data['cluster']['cdhVersion'] = 'Data Warehouse'
    return data


  def update_cluster(self, **params):
    return self._root.post('updateCluster', data=json.dumps(params), contenttype="application/json")
Exemple #39
0
class HistoryServerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local()  # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "HistoryServerApi at %s" % (self._url,)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get():  # We impersonate if needed
      params['doAs'] = self.username
      if not self._security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  @property
  def url(self):
    return self._url

  @property
  def user(self):
    return self.username  # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  def job(self, user, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    job_id = job_id.replace('application', 'job')
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt_counters(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #40
0
class OozieApi(object):
    def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        if hasattr(user, "username"):
            self.user = user.username
        else:
            self.user = user
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def _get_params(self):
        if self.security_enabled:
            return {"doAs": self.user, "timezone": TIME_ZONE.get()}
        return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()}

    def _get_oozie_properties(self, properties=None):
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ("name", "user", "group", "status", "startcreatedtime", "text")
    VALID_LOG_FILTERS = set(("recent", "limit", "loglevel", "text"))

    def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
        """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params["offset"] = str(offset)
        if cnt is not None:
            params["len"] = str(cnt)
        if filters is None:
            filters = []
        params["jobtype"] = jobtype

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["filter"] = ";".join(filter_list)

        # Send the request
        resp = self._root.get("jobs", params)
        if jobtype == "wf":
            wf_list = WorkflowList(self, resp, filters=filters)
        elif jobtype == "coord":
            wf_list = CoordinatorList(self, resp, filters=filters)
        else:
            wf_list = BundleList(self, resp, filters=filters)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, filters=None):
        return self.get_jobs("wf", offset, cnt, filters)

    def get_coordinators(self, offset=None, cnt=None, filters=None):
        return self.get_jobs("coord", offset, cnt, filters)

    def get_bundles(self, offset=None, cnt=None, filters=None):
        return self.get_jobs("bundle", offset, cnt, filters)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
        params = self._get_params()
        if offset is not None:
            params["offset"] = str(offset)
        if cnt is not None:
            params["len"] = str(cnt)
        if filters is None:
            filters = {}
        params.update({"order": "desc"})

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["filter"] = ";".join(filter_list)

        resp = self._root.get("job/%s" % (jobid,), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params["show"] = "definition"
        return self._root.get("job/%s" % (jobid,), params)

    def get_job_log(self, jobid, logfilter=None):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params["show"] = "log"

        filter_list = []
        if logfilter is None:
            logfilter = []
        for key, val in logfilter:
            if key not in OozieApi.VALID_LOG_FILTERS:
                raise ValueError('"%s" is not a valid filter for job logs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["logfilter"] = ";".join(filter_list)
        return self._root.get("job/%s" % (jobid,), params)

    def get_job_status(self, jobid):
        params = self._get_params()
        params["show"] = "status"

        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_action(self, action_id):
        if "C@" in action_id:
            Klass = CoordinatorAction
        elif "B@" in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get("job/%s" % (action_id,), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in (
            "start",
            "suspend",
            "resume",
            "kill",
            "rerun",
            "coord-rerun",
            "bundle-rerun",
            "change",
            "ignore",
            "update",
        ):
            msg = "Invalid oozie job action: %s" % (action,)
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params["action"] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"oozie.wf.application.path": application_path, "user.name": self.user}

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
        return resp["id"]

    def dryrun(self, properties=None):
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        params["action"] = "dryrun"
        return self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params["action"] = "rerun"

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/build-version", params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get("admin/instrumentation", params)
        return resp

    def get_metrics(self):
        params = self._get_params()
        resp = self._root.get("admin/metrics", params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/configuration", params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/status", params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get("sla", params)
        return resp["slaSummaryList"]
Exemple #41
0
class HistoryServerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "HistoryServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    def job(self, user, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/counters' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/conf' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks' %
                              {'job_id': job_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {
            'job_id': job_id,
            'task_id': task_id
        },
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {
                'job_id': job_id,
                'task_id': task_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {
                'job_id': job_id,
                'task_id': task_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            headers={'Accept': _JSON_CONTENT_TYPE})
Exemple #42
0
class ResourceManagerApi(object):

  def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(rm_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local() # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get(): # We impersonate if needed
      params['doAs'] = self.username
      if not self.security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  @property
  def user(self):
    return self.username # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    params = self._get_params()
    params.update(kwargs)
    return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    data = {'state': 'KILLED'}
    token = None

    # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app.
    if self.security_enabled and False:
      full_token = self.delegation_token()
      if 'token' not in full_token:
        raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token))
      data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token')
      LOG.debug('Received delegation token %s' % full_token)

    try:
      params = self._get_params()
      return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    finally:
      if token:
        self.cancel_token(token)

  def delegation_token(self):
    params = self._get_params()
    data = {'renewer': self.username}
    return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def cancel_token(self, token):
    params = self._get_params()
    headers = {'Hadoop-YARN-RM-Delegation-Token': token}
    LOG.debug('Canceling delegation token of ' % self.username)
    return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers)

  def _execute(self, function, *args, **kwargs):
    response = function(*args, **kwargs)

    # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
    # failed back to the master RM.
    if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'):
      raise YarnFailoverOccurred(response)

    return response
Exemple #43
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self,
                 solr_url,
                 user,
                 security_enabled=SECURITY_ENABLED.get()
                 if search_enabled() else SECURITY_ENABLED.default,
                 ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke('HEAD', '/')

    def query(self, collection, query):
        solr_query = {}

        solr_query['collection'] = collection['name']

        if query.get('download'):
            solr_query['rows'] = 1000
            solr_query['start'] = 0
        else:
            solr_query['rows'] = int(collection['template']['rows'] or 10)
            solr_query['start'] = int(query['start'])

        solr_query['rows'] = min(solr_query['rows'], 1000)
        solr_query['start'] = min(solr_query['start'], 10000)

        params = self._get_params() + (
            ('q', self._get_q(query)),
            ('wt', 'json'),
            ('rows', solr_query['rows']),
            ('start', solr_query['start']),
        )

        if any(collection['facets']):
            params += (
                ('facet', 'true'),
                ('facet.mincount', 0),
                ('facet.limit', 10),
            )
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection['facets']:
                if facet['type'] == 'query':
                    params += (('facet.query', '%s' % facet['field']), )
                elif facet['type'] == 'range' or facet['type'] == 'range-up':
                    keys = {
                        'id': '%(id)s' % facet,
                        'field': facet['field'],
                        'key': '%(field)s-%(id)s' % facet,
                        'start': facet['properties']['start'],
                        'end': facet['properties']['end'],
                        'gap': facet['properties']['gap'],
                        'mincount': int(facet['properties']['mincount'])
                    }

                    if timeFilter and timeFilter['time_field'] == facet[
                            'field'] and (
                                facet['id']
                                not in timeFilter['time_filter_overrides']
                                or facet['widgetType'] != 'histogram-widget'):
                        keys.update(
                            self._get_time_filter_query(timeFilter, facet))

                    params += ((
                        'facet.range',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'field':
                    keys = {
                        'id':
                        '%(id)s' % facet,
                        'field':
                        facet['field'],
                        'key':
                        '%(field)s-%(id)s' % facet,
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }

                    params += ((
                        'facet.field',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'nested':
                    _f = {
                        'field':
                        facet['field'],
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1
                         if facet['widgetType'] == 'text-facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount']),
                        'sort': {
                            'count': facet['properties']['sort']
                        },
                    }
                    print facet

                    if facet['properties']['domain'].get(
                            'blockParent'
                    ) or facet['properties']['domain'].get('blockChildren'):
                        _f['domain'] = {}
                        if facet['properties']['domain'].get('blockParent'):
                            _f['domain']['blockParent'] = ' OR '.join(
                                facet['properties']['domain']['blockParent'])
                        if facet['properties']['domain'].get('blockChildren'):
                            _f['domain']['blockChildren'] = ' OR '.join(
                                facet['properties']['domain']['blockChildren'])

                    if 'start' in facet['properties'] and not facet[
                            'properties'].get('type') == 'field':
                        _f.update({
                            'type': 'range',
                            'start': facet['properties']['start'],
                            'end': facet['properties']['end'],
                            'gap': facet['properties']['gap'],
                        })
                        if timeFilter and timeFilter['time_field'] == facet[
                                'field'] and (
                                    facet['id']
                                    not in timeFilter['time_filter_overrides']
                                    or facet['widgetType'] != 'bucket-widget'):
                            _f.update(
                                self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({
                            'type': 'terms',
                            'field': facet['field'],
                            'excludeTags': facet['id'],
                            'offset': 0,
                            'numBuckets': True,
                            'allBuckets': True,
                            'prefix': ''
                        })
                        if facet['properties']['canRange'] and not facet[
                                'properties']['isDate']:
                            del _f['mincount']  # Numeric fields do not support

                    if facet['properties']['facets']:
                        self._n_facet_dimension(facet, _f,
                                                facet['properties']['facets'],
                                                1)
                        if facet['widgetType'] == 'text-facet-widget':
                            _fname = _f['facet'].keys()[0]
                            _f['sort'] = {_fname: facet['properties']['sort']}
                            # domain = '-d2:NaN' # Solr 6.4

                    json_facets[facet['id']] = _f
                elif facet['type'] == 'function':
                    json_facets[facet['id']] = self._get_aggregate_function(
                        facet)
                    json_facets['processEmpty'] = True
                elif facet['type'] == 'pivot':
                    if facet['properties']['facets'] or facet[
                            'widgetType'] == 'map-widget':
                        fields = facet['field']
                        fields_limits = []
                        for f in facet['properties']['facets']:
                            fields_limits.append('f.%s.facet.limit=%s' %
                                                 (f['field'], f['limit']))
                            fields_limits.append('f.%s.facet.mincount=%s' %
                                                 (f['field'], f['mincount']))
                            fields += ',' + f['field']
                        keys = {
                            'id': '%(id)s' % facet,
                            'key': '%(field)s-%(id)s' % facet,
                            'field': facet['field'],
                            'fields': fields,
                            'limit': int(facet['properties'].get('limit', 10)),
                            'mincount': int(facet['properties']['mincount']),
                            'fields_limits': ' '.join(fields_limits)
                        }
                        params += ((
                            'facet.pivot',
                            '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s'
                            % keys), )

            if json_facets:
                params += (('json.facet', json.dumps(json_facets)), )

        params += self._get_fq(collection, query)

        if collection['template']['fieldsSelected'] and collection['template'][
                'isGridLayout']:
            fields = set(
                collection['template']['fieldsSelected'] +
                [collection['idField']] if collection['idField'] else [])
            # Add field if needed
            if collection['template']['leafletmap'].get('latitudeField'):
                fields.add(
                    collection['template']['leafletmap']['latitudeField'])
            if collection['template']['leafletmap'].get('longitudeField'):
                fields.add(
                    collection['template']['leafletmap']['longitudeField'])
            if collection['template']['leafletmap'].get('labelField'):
                fields.add(collection['template']['leafletmap']['labelField'])
            fl = urllib.unquote(utf_quoter(','.join(list(fields))))
        else:
            fl = '*'

        nested_fields = self._get_nested_fields(collection)
        if nested_fields:
            fl += urllib.unquote(
                utf_quoter(',[child parentFilter="%s"]' %
                           ' OR '.join(nested_fields)))

        params += (('fl', fl), )

        params += (
            ('hl', 'true'),
            ('hl.fl', '*'),
            ('hl.snippets', 5),
            ('hl.fragsize', 1000),
        )

        if collection['template']['fieldsSelected']:
            fields = []
            for field in collection['template']['fieldsSelected']:
                attribute_field = filter(
                    lambda attribute: field == attribute['name'],
                    collection['template']['fieldsAttributes'])
                if attribute_field:
                    if attribute_field[0]['sort']['direction']:
                        fields.append(
                            '%s %s' %
                            (field, attribute_field[0]['sort']['direction']))
            if fields:
                params += (('sort', ','.join(fields)), )

        response = self._root.get('%(collection)s/select' % solr_query, params)
        return self._get_json(response)

    def _n_facet_dimension(self, widget, _f, facets, dim):
        facet = facets[0]
        f_name = 'dim_%02d:%s' % (dim, facet['field'])

        if facet['aggregate']['function'] == 'count':
            if 'facet' not in _f:
                _f['facet'] = {f_name: {}}
            else:
                _f['facet'][f_name] = {}
            _f = _f['facet']

            _f[f_name] = {
                'type': 'terms',
                'field': '%(field)s' % facet,
                'limit': int(facet.get('limit', 10)),
                'mincount': int(facet['mincount']),
                'numBuckets': True,
                'allBuckets': True,
                'prefix': ''
            }
            if widget['widgetType'] == 'tree2-widget' and facets[-1][
                    'aggregate']['function'] != 'count':
                _f['subcount'] = self._get_aggregate_function(facets[-1])

            if len(facets) > 1:  # Get n+1 dimension
                if facets[1]['aggregate']['function'] == 'count':
                    self._n_facet_dimension(widget, _f[f_name], facets[1:],
                                            dim + 1)
                else:
                    self._n_facet_dimension(widget, _f[f_name], facets[1:],
                                            dim)
        else:
            agg_function = self._get_aggregate_function(facet)
            _f['facet'] = {
                'agg_%02d_00:%s' % (dim, agg_function): agg_function
            }
            for i, _f_agg in enumerate(facets[1:], 1):
                if _f_agg['aggregate']['function'] != 'count':
                    agg_function = self._get_aggregate_function(_f_agg)
                    _f['facet']['agg_%02d_%02d:%s' %
                                (dim, i, agg_function)] = agg_function
                else:
                    self._n_facet_dimension(widget, _f, facets[i:],
                                            dim + 1)  # Get n+1 dimension
                    break

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ('suggest', 'true'),
                ('suggest.build', 'true'),
                ('suggest.q', query['q']),
                ('wt', 'json'),
            )
            if query.get('dictionary'):
                params += (('suggest.dictionary', query['dictionary']), )
            response = self._root.get('%s/suggest' % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Solr'))
Exemple #44
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
    if solr_url is None and hasattr(SOLR_URL, 'get'):
      solr_url = SOLR_URL.get()

    if solr_url:
      self._url = solr_url
      self._user = user
      self._client = HttpClient(self._url, logger=LOG)
      self.security_enabled = security_enabled or SECURITY_ENABLED.get()

      if self.security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(ssl_cert_ca_verify)

      self._root = resource.Resource(self._client)

      # The Kerberos handshake requires two requests in order to authenticate,
      # but if our first request is a PUT/POST, it might flat-out reject the
      # first request if the body is too large. So, connect here in order to get
      # a cookie so future PUT/POSTs will be pre-authenticated.
      if self.security_enabled:
        self._root.invoke('HEAD', '/')


  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      json_facets = {}

      timeFilter = self._get_range_borders(collection, query)

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }

          if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'):
            keys.update(self._get_time_filter_query(timeFilter, facet))

          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {}
          if facet['properties']['facets']:
            self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter)

          if facet['properties'].get('domain'):
            if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'):
              _f['domain'] = {}
              if facet['properties']['domain'].get('blockParent'):
                _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent'])
              if facet['properties']['domain'].get('blockChildren'):
                _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren'])

          if _f:
            sort = {'count': facet['properties']['facets'][0]['sort']}
            for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])):
              if agg['sort'] != 'default':
                agg_function = self._get_aggregate_function(agg)
                sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']}

            if sort.get('count') == 'default':
              sort['count'] = 'desc'

            dim_key = [key for key in _f['facet'].keys() if 'dim' in key][0]
            _f['facet'][dim_key].update({
                  'excludeTags': facet['id'],
                  'offset': 0,
                  'numBuckets': True,
                  'allBuckets': True,
                  'sort': sort
                  #'prefix': '' # Forbidden on numeric fields
              })
            json_facets[facet['id']] = _f['facet'][dim_key]
        elif facet['type'] == 'function':
          if facet['properties']['facets']:
            json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0])
            if facet['properties']['compare']['is_enabled']:
              # TODO: global compare override
              unit = re.split('\d+', facet['properties']['compare']['gap'])[1]
              json_facets[facet['id']] = {
                'type': 'range',
                'field': collection['timeFilter'].get('field'),
                'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']),
                'end': 'NOW/%s' % unit,
                'gap': '+%(gap)s' % facet['properties']['compare'],
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            if facet['properties']['filter']['is_enabled']:
              json_facets[facet['id']] = {
                'type': 'query',
                'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(),
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            json_facets['processEmpty'] = True
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

      if json_facets:
        params += (
            ('json.facet', json.dumps(json_facets)),
        )

    params += self._get_fq(collection, query)

    fl = urllib.unquote(utf_quoter(','.join(Collection2.get_field_list(collection))))

    nested_fields = self._get_nested_fields(collection)
    if nested_fields:
      fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields)))

    if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents
      id_field = collection.get('idField', 'id')
      params += (
        ('mlt', 'true'),
        ('mlt.fl', fl.replace(',%s' % id_field, '')),
        ('mlt.mintf', 1),
        ('mlt.mindf', 1),
        ('mlt.maxdf', 50),
        ('mlt.maxntp', 1000),
        ('mlt.count', 10),
        #('mlt.minwl', 1),
        #('mlt.maxwl', 1),
      )
      fl = '*'

    params += (('fl', fl),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 5),
      ('hl.fragsize', 1000),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)
    return self._get_json(response)


  def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter):
    facet = facets[0]
    f_name = 'dim_%02d:%s' % (dim, facet['field'])

    if facet['aggregate']['function'] == 'count':
      if 'facet' not in _f:
        _f['facet'] = {f_name: {}}
      else:
        _f['facet'][f_name] = {}
      _f = _f['facet']

      sort = {'count': facet['sort']}
      for i, agg in enumerate(self._get_dimension_aggregates(facets)):
        if agg['sort'] != 'default':
          agg_function = self._get_aggregate_function(agg)
          sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']}
      if sort.get('count') == 'default':
        sort['count'] = 'desc'

      _f[f_name] = {
          'type': 'terms',
          'field': '%(field)s' % facet,
          'limit': int(facet.get('limit', 10)),
          'mincount': int(facet['mincount']),
          'numBuckets': True,
          'allBuckets': True,
          'sort': sort,
          'missing': facet.get('missing', False)
          #'prefix': '' # Forbidden on numeric fields
      }

      if 'start' in facet and not facet.get('type') == 'field':
        _f[f_name].update({
            'type': 'range',
            'start': facet['start'],
            'end': facet['end'],
            'gap': facet['gap']
        })

        # Only on dim 1 currently
        if timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides']): # or facet['widgetType'] != 'bucket-widget'):
          facet['widgetType'] = widget['widgetType']
          _f[f_name].update(self._get_time_filter_query(timeFilter, facet))

      if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count':
        _f['subcount'] = self._get_aggregate_function(facets[-1])

      if len(facets) > 1: # Get n+1 dimension
        if facets[1]['aggregate']['function'] == 'count':
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter)
        else:
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter)
    else:
      agg_function = self._get_aggregate_function(facet)
      _f['facet'] = {
          'agg_%02d_00:%s' % (dim, agg_function): agg_function
      }
      for i, _f_agg in enumerate(facets[1:], 1):
        if _f_agg['aggregate']['function'] != 'count':
          agg_function = self._get_aggregate_function(_f_agg)
          _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function
        else:
          self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter) # Get n+1 dimension
          break


  def select(self, collection, query=None, rows=100, start=0):
    if query is None:
      query = EMPTY_QUERY.get()

    params = self._get_params() + (
        ('q', query),
        ('wt', 'json'),
        ('rows', rows),
        ('start', start),
    )

    response = self._root.get('%s/select' % collection, params)
    return self._get_json(response)


  def suggest(self, collection, query):
    try:
      params = self._get_params() + (
          ('suggest', 'true'),
          ('suggest.build', 'true'),
          ('suggest.q', query['q']),
          ('wt', 'json'),
      )
      if query.get('dictionary'):
        params += (
            ('suggest.dictionary', query['dictionary']),
        )
      response = self._root.get('%s/suggest' % collection, params)
      return self._get_json(response)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemple #45
0
class ResourceManagerApi(object):
    def __init__(self,
                 username,
                 rm_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._username = username
        self._url = posixpath.join(rm_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def _get_params(self):
        params = {}

        if self._username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self._username
            if not self.security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    def __str__(self):
        return "ResourceManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def cluster(self, **kwargs):
        params = self._get_params()
        return self._execute(self._root.get,
                             'cluster/info',
                             params=params,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def apps(self, **kwargs):
        params = self._get_params()
        params.update(kwargs)
        return self._execute(self._root.get,
                             'cluster/apps',
                             params=params,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def app(self, app_id):
        params = self._get_params()
        return self._execute(self._root.get,
                             'cluster/apps/%(app_id)s' % {'app_id': app_id},
                             params=params,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, app_id):
        data = {'state': 'KILLED'}
        token = None

        # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app.
        if self.security_enabled and False:
            full_token = self.delegation_token()
            if 'token' not in full_token:
                raise PopupException(_('YARN did not return any token field.'),
                                     detail=smart_str(full_token))
            data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token')
            LOG.debug('Received delegation token %s' % full_token)

        try:
            params = self._get_params()
            return self._execute(self._root.put,
                                 'cluster/apps/%(app_id)s/state' %
                                 {'app_id': app_id},
                                 params=params,
                                 data=json.dumps(data),
                                 contenttype=_JSON_CONTENT_TYPE)
        finally:
            if token:
                self.cancel_token(token)

    def delegation_token(self):
        params = self._get_params()
        data = {'renewer': self._username}
        return self._execute(self._root.post,
                             'cluster/delegation-token',
                             params=params,
                             data=json.dumps(data),
                             contenttype=_JSON_CONTENT_TYPE)

    def cancel_token(self, token):
        params = self._get_params()
        headers = {'Hadoop-YARN-RM-Delegation-Token': token}
        LOG.debug('Canceling delegation token of ' % self._username)
        return self._execute(self._root.delete,
                             'cluster/delegation-token',
                             params=params,
                             headers=headers)

    def _execute(self, function, *args, **kwargs):
        response = function(*args, **kwargs)

        # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
        # failed back to the master RM.
        if isinstance(response, str) and response.startswith(
                'This is standby RM. Redirecting to the current active RM'):
            raise YarnFailoverOccurred(response)

        return response
Exemple #46
0
class MapreduceApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager().kill(app_id) # We need to call the RM
Exemple #47
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = security_enabled

    if self.security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)

    # The Kerberos handshake requires two requests in order to authenticate,
    # but if our first request is a PUT/POST, it might flat-out reject the
    # first request if the body is too large. So, connect here in order to get
    # a cookie so future PUT/POSTs will be pre-authenticated.
    if self.security_enabled:
      self._root.invoke('HEAD', '/')

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)

  def _get_q(self, query):
    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
    return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8')

  def _get_aggregate_function(self, facet):
    props = {
        'field': facet['field'],
        'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate']
    }

    if props['aggregate'] == 'median':
      return 'percentile(%(field)s,50)' % props
    else:
      return '%(aggregate)s(%(field)s)' % props

  def _get_range_borders(self, collection, query):
    props = {}
    GAPS = {
        '5MINUTES': {
            'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots
        },
        '30MINUTES': {
            'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'bucket-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'bar-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'facet-widget': {'coeff': '+5', 'unit': 'MINUTES'},
        },
        '1HOURS': {
            'histogram-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'bucket-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'bar-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'facet-widget': {'coeff': '+10', 'unit': 'MINUTES'},
        },
        '12HOURS': {
            'histogram-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+1', 'unit': 'HOURS'},
        },
        '1DAYS': {
            'histogram-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+3', 'unit': 'HOURS'},
        },
        '2DAYS': {
            'histogram-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+6', 'unit': 'HOURS'},
        },
        '7DAYS': {
            'histogram-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'bucket-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'bar-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'facet-widget': {'coeff': '+1', 'unit': 'DAYS'},
        },
        '1MONTHS': {
            'histogram-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'bucket-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'bar-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'facet-widget': {'coeff': '+5', 'unit': 'DAYS'},
        },
        '3MONTHS': {
            'histogram-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+30', 'unit': 'DAYS'},
        },
        '1YEARS': {
            'histogram-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+12', 'unit': 'MONTHS'},
        },
        '2YEARS': {
            'histogram-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+3', 'unit': 'MONTHS'},
        },
        '10YEARS': {
            'histogram-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'bucket-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'bar-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'facet-widget': {'coeff': '+1', 'unit': 'YEARS'},
        }
    }

    time_field = collection['timeFilter'].get('field')

    if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'):
      # fqs overrides main time filter
      fq_time_ids = [fq['id'] for fq in query['fqs'] if fq['field'] == time_field]
      props['time_filter_overrides'] = fq_time_ids
      props['time_field'] = time_field

      if collection['timeFilter']['type'] == 'rolling':
        props['field'] = collection['timeFilter']['field']
        props['from'] = 'NOW-%s' % collection['timeFilter']['value']
        props['to'] = 'NOW'
        props['gap'] = GAPS.get(collection['timeFilter']['value'])
      elif collection['timeFilter']['type'] == 'fixed':
        props['field'] = collection['timeFilter']['field']
        props['from'] = collection['timeFilter']['from']
        props['to'] = collection['timeFilter']['to']
        props['fixed'] = True

    return props

  def _get_time_filter_query(self, timeFilter, facet):
    if 'fixed' in timeFilter:
      props = {}
      stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
      _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max'])
      gap = props['gap']
      unit = re.split('\d+', gap)[1]
      return {
        'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': unit},
        'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': unit},
        'gap': '%(gap)s' % props, # add a 'auto'
      }
    else:
      gap = timeFilter['gap'][facet['widgetType']]
      return {
        'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': gap['unit']},
        'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': gap['unit']},
        'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto'
      }

  def _get_fq(self, collection, query):
    params = ()
    timeFilter = {}

    if collection:
      timeFilter = self._get_range_borders(collection, query)
    if timeFilter and not timeFilter.get('time_filter_overrides'):
      params += (('fq', urllib.unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),)

    # Merge facets queries on same fields
    grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
    merged_fqs = []
    for key, group in grouped_fqs:
      field_fq = next(group)
      for fq in group:
        for f in fq['filter']:
          field_fq['filter'].append(f)
      merged_fqs.append(field_fq)

    for fq in merged_fqs:
      if fq['type'] == 'field':
        fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support
        for field in fields:
          f = []
          for _filter in fq['filter']:
            values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support
            if fields.index(field) < len(values): # Lowest common field denominator
              value = values[fields.index(field)]
              exclude = '-' if _filter['exclude'] else ''
              if value is not None and ' ' in force_unicode(value):
                value = force_unicode(value).replace('"', '\\"')
                f.append('%s%s:"%s"' % (exclude, field, value))
              else:
                f.append('%s{!field f=%s}%s' % (exclude, field, value))
          _params ='{!tag=%(id)s}' % fq + ' '.join(f)
          params += (('fq', urllib.unquote(utf_quoter(_params))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'range-up':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from'])))
                                                          for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'map':
        _keys = fq.copy()
        _keys.update(fq['properties'])
        params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote(
                    utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),)

    return params

  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      json_facets = {}

      timeFilter = self._get_range_borders(collection, query)

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }

          if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'):
            keys.update(self._get_time_filter_query(timeFilter, facet))

          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }
          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {
              'field': facet['field'],
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          if 'start' in facet['properties']:
            _f.update({
                'type': 'range',
                'start': facet['properties']['start'],
                'end': facet['properties']['end'],
                'gap': facet['properties']['gap'],
            })
            if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'):
              _f.update(self._get_time_filter_query(timeFilter, facet))
          else:
            _f.update({
                'type': 'terms',
                'field': facet['field'],
                'excludeTags': facet['id']
            })

          if facet['properties']['facets']:
            if facet['properties']['facets'][0]['aggregate'] == 'count':
              _f['facet'] = {
                  'd2': {
                      'type': 'terms',
                      'field': '%(field)s' % facet['properties']['facets'][0],
                      'limit': int(facet['properties']['facets'][0].get('limit', 10)),
                      'mincount': int(facet['properties']['facets'][0]['mincount'])
                  }
              }
              if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation
                _f['facet']['d2']['facet'] = {
                    'd2': self._get_aggregate_function(facet['properties']['facets'][1])
                }
            else:
              _f['facet'] = {
                  'd2': self._get_aggregate_function(facet['properties']['facets'][0])
              }

          json_facets[facet['id']] = _f
        elif facet['type'] == 'function':
          json_facets[facet['id']] = self._get_aggregate_function(facet)
          json_facets['processEmpty'] = True
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

      if json_facets:
        params += (
            ('json.facet', json.dumps(json_facets)),
        )

    params += self._get_fq(collection, query)

    if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']:
      fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else [])
      # Add field if needed
      if collection['template']['leafletmap'].get('latitudeField'):
        fields.add(collection['template']['leafletmap']['latitudeField'])
      if collection['template']['leafletmap'].get('longitudeField'):
        fields.add(collection['template']['leafletmap']['longitudeField'])
      if collection['template']['leafletmap'].get('labelField'):
        fields.add(collection['template']['leafletmap']['labelField'])
      params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),)
    else:
      params += (('fl', '*'),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 5),
      ('hl.fragsize', 1000),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)
    return self._get_json(response)


  def suggest(self, collection, query):
    try:
      params = self._get_params() + (
          ('suggest', 'true'),
          ('suggest.build', 'true'),
          ('suggest.q', query['q']),
          ('wt', 'json'),
      )
      if query.get('dictionary'):
        params += (
            ('suggest.dictionary', query['dictionary']),
        )
      response = self._root.get('%s/suggest' % collection, params)
      return self._get_json(response)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemple #48
0
class SparkJob(Application):
    def __init__(self, job, rm_api=None, hs_api=None):
        super(SparkJob, self).__init__(job, rm_api)
        self._resolve_tracking_url()
        if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
            self.history_server_api = hs_api
            self._get_metrics()

    @property
    def logs_url(self):
        log_links = self.history_server_api.get_executors_loglinks(self)
        return log_links[
            'stdout'] if log_links and 'stdout' in log_links else ''

    @property
    def attempt_id(self):
        return self.trackingUrl.strip('/').split('/')[-1]

    def _resolve_tracking_url(self):
        resp = None
        try:
            self._client = HttpClient(self.trackingUrl, logger=LOG)
            self._root = Resource(self._client)
            yarn_cluster = cluster.get_cluster_conf_for_job_submission()
            self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
            if self._security_enabled:
                self._client.set_kerberos_auth()

            self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
            actual_url = self._execute(self._root.resolve_redirect_url)

            if actual_url.strip('/').split('/')[-1] == 'jobs':
                actual_url = actual_url.strip('/').replace('jobs', '')
            self.trackingUrl = actual_url
            LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
        except Exception as e:
            LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" %
                     e)
        finally:
            if resp is not None:
                resp.close()

    def _execute(self, function, *args, **kwargs):
        response = None
        try:
            response = function(*args, **kwargs)
        except Exception as e:
            LOG.warn(
                'Spark resolve tracking URL returned a failed response: %s' %
                e)
        return response

    def _get_metrics(self):
        self.metrics = {}
        try:
            executors = self.history_server_api.executors(self)
            if executors:
                self.metrics['headers'] = [
                    _('Executor Id'),
                    _('Address'),
                    _('RDD Blocks'),
                    _('Storage Memory'),
                    _('Disk Used'),
                    _('Active Tasks'),
                    _('Failed Tasks'),
                    _('Complete Tasks'),
                    _('Task Time'),
                    _('Input'),
                    _('Shuffle Read'),
                    _('Shuffle Write'),
                    _('Logs')
                ]
                self.metrics['executors'] = []
                for e in executors:
                    self.metrics['executors'].append([
                        e.get('id', 'N/A'),
                        e.get('hostPort', ''),
                        e.get('rddBlocks', ''),
                        '%s / %s' %
                        (big_filesizeformat(e.get('memoryUsed', 0)),
                         big_filesizeformat(e.get('maxMemory', 0))),
                        big_filesizeformat(e.get('diskUsed', 0)),
                        e.get('activeTasks', ''),
                        e.get('failedTasks', ''),
                        e.get('completedTasks', ''),
                        format_duration_in_millis(e.get('totalDuration', 0)),
                        big_filesizeformat(e.get('totalInputBytes', 0)),
                        big_filesizeformat(e.get('totalShuffleRead', 0)),
                        big_filesizeformat(e.get('totalShuffleWrite', 0)),
                        e.get('executorLogs', '')
                    ])
        except Exception as e:
            LOG.error('Failed to get Spark Job executors: %s' % e)
            # Prevent a nosedive. Don't create metrics if api changes or url is unreachable.

    def get_executors(self):
        executor_list = []
        if hasattr(self, 'metrics') and 'executors' in self.metrics:
            executors = self.metrics['executors']
            headers = [
                'executor_id', 'address', 'rdd_blocks', 'storage_memory',
                'disk_used', 'active_tasks', 'failed_tasks', 'complete_tasks',
                'task_time', 'input', 'shuffle_read', 'shuffle_write', 'logs'
            ]
            for executor in executors:
                executor_data = dict(zip(headers, executor))
                executor_data.update({
                    'id':
                    executor_data['executor_id'] + '_executor_' + self.jobId,
                    'type':
                    'SPARK_EXECUTOR'
                })
                executor_list.append(executor_data)
        return executor_list
Exemple #49
0
class MapreduceApi(object):

  def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(mr_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local()  # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get():  # We impersonate if needed
      params['doAs'] = self.username
      if not self._security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  @property
  def url(self):
    return self._url

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  def setuser(self, user):
    curr = self.username
    self._thread_local.user = user
    return curr

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager(self.username).kill(app_id) # We need to call the RM
Exemple #50
0
class LivyClient(object):
    def __init__(self, livy_url):
        self._url = posixpath.join(livy_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = SECURITY_ENABLED.get()
        self._csrf_enabled = CSRF_ENABLED.get()
        self._thread_local = threading.local()

        if self.security_enabled:
            self._client.set_kerberos_auth()

        if self.csrf_enabled:
            self._client.set_headers({'X-Requested-By': 'hue'})

        self._client.set_verify(SSL_CERT_CA_VERIFY.get())

    def __str__(self):
        return "LivyClient at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def csrf_enabled(self):
        return self._csrf_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('sessions/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def create_session(self, **properties):
        properties['proxyUser'] = self.user.split('@')[0]
        if has_connectors():  # Only SQL supported via connectors currently
            properties['kind'] = 'sql'

        return self._root.post('sessions',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_sessions(self):
        return self._root.get('sessions')

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def get_statements(self, uuid):
        return self._root.get('sessions/%s/statements' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def close(self, uuid):
        return self._root.delete('sessions/%s' % uuid)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        properties['proxyUser'] = self.user
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get('batches/%s/state' % uuid)
        return response['state']

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('batches/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def close_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Exemple #51
0
class SqoopClient(object):

  STATUS_GOOD = ('FINE', 'ACCEPTABLE')
  STATUS_BAD = ('UNACCEPTABLE', 'FAILURE_ON_SUBMIT')

  def __init__(self, url, username, language='en', ssl_cert_ca_verify=False):
    self._url = url
    self._client = HttpClient(self._url, logger=LOG)
    self._root = SqoopResource(self._client)
    self._language = language
    self._username = username

    if has_sqoop_has_security():
      self._client.set_kerberos_auth()
    self._security_enabled = has_sqoop_has_security()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "SqoopClient at %s with security %s" % (self._url, self._security_enabled)

  @property
  def url(self):
    return self._url

  @property
  def headers(self):
    return {
      'Accept': 'application/json',
      'Accept-Language': self._language,
      'sqoop-user-name': self._username
    }

  def get_version(self):
    return self._root.get('version', headers=self.headers)

  def get_driver(self):
    resp_dict = self._root.get('%s/driver' % API_VERSION, headers=self.headers)
    driver = Driver.from_dict(resp_dict)
    return driver

  def get_connectors(self):
    resp_dict = self._root.get('%s/connectors' % API_VERSION, headers=self.headers)
    connectors = [ Connector.from_dict(connector_dict) for connector_dict in resp_dict['connectors'] ]
    return connectors

  def get_connector(self, connector_id):
    resp_dict = self._root.get('%s/connector/%d/' % (API_VERSION, connector_id), headers=self.headers)
    if resp_dict['connector']:
      return Connector.from_dict(resp_dict['connector'])
    return None

  def get_links(self):
    resp_dict = self._root.get('%s/links' % API_VERSION, headers=self.headers)
    links = [Link.from_dict(link_dict) for link_dict in resp_dict['links']]
    return links

  def get_link(self, link_id):
    resp_dict = self._root.get('%s/link/%d/' % (API_VERSION, link_id), headers=self.headers)
    if resp_dict['link']:
      return Link.from_dict(resp_dict['link'])
    return None

  def create_link(self, link):
    link.creation_date = int( round(time.time() * 1000) )
    link.update_date = link.creation_date
    link_dict = link.to_dict()
    request_dict = {
      'link': link_dict
    }
    resp = self._root.post('%s/link/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers)

    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    link.id = resp['id']
    return link

  def update_link(self, link):
    if not link.link_config_values:
      link.link_config_values = self.get_connectors()[0].link_config
    link.updated = int( round(time.time() * 1000) )
    link_dict = link.to_dict()
    request_dict = {
      'link': link_dict
    }
    resp = self._root.put('%s/link/%d/' % (API_VERSION, link.id), data=json.dumps(request_dict), headers=self.headers)
    
    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    return link

  def delete_link(self, link):
    resp = self._root.delete('%s/link/%d/' % (API_VERSION, link.id), headers=self.headers)
    return None

  def get_jobs(self):
    resp_dict = self._root.get('%s/jobs' % API_VERSION, headers=self.headers)
    jobs = [Job.from_dict(job_dict) for job_dict in resp_dict['jobs']]
    return jobs

  def get_job(self, job_id):
    resp_dict = self._root.get('%s/job/%d/' % (API_VERSION, job_id), headers=self.headers)
    if resp_dict['job']:
      return Job.from_dict(resp_dict['job'])
    return None

  def create_job(self, job):
    if not job.from_config_values:
      job.from_config_values = self.get_connectors()[0].job_config['FROM']
    if not job.to_config_values:
      job.to_config_values = self.get_connectors()[0].job_config['TO']
    if not job.driver_config_values:
      job.driver_config_values = self.get_driver().job_config
    job.creation_date = int( round(time.time() * 1000) )
    job.update_date = job.creation_date
    job_dict = job.to_dict()
    request_dict = {
      'job': job_dict
    }
    resp = self._root.post('%s/job/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers)
    if 'id' not in resp:
      raise SqoopException.from_dicts(resp['validation-result'])
    job.id = resp['id']
    return job

  def update_job(self, job):
    if not job.from_config_values:
      job.from_config_values = self.get_connectors()[0].job_config['FROM']
    if not job.to_config_values:
      job.to_config_values = self.get_connectors()[0].job_config['TO']
    if not job.driver_config_values:
      job.driver_config_values = self.get_driver().job_config
    job.updated = int( round(time.time() * 1000) )
    job_dict = job.to_dict()
    request_dict = {
      'job': job_dict
    }
    resp = self._root.put('%s/job/%d/' % (API_VERSION, job.id), data=json.dumps(request_dict), headers=self.headers)

    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    return job

  def delete_job(self, job):
    resp_dict = self._root.delete('%s/job/%s' % (API_VERSION, job.id), headers=self.headers)
    return None

  def get_job_status(self, job):
    resp_dict = self._root.get('%s/job/%d/status' % (API_VERSION, job.id), headers=self.headers)
    return Submission.from_dict(resp_dict['submission'])

  def start_job(self, job):
    resp_dict = self._root.put('%s/job/%d/start' % (API_VERSION, job.id), headers=self.headers)
    if resp_dict['submission']['status'] in SqoopClient.STATUS_BAD:
      raise SqoopSubmissionException.from_dict(resp_dict['submission'])
    return Submission.from_dict(resp_dict['submission'])

  def stop_job(self, job):
    resp_dict = self._root.put('%s/job/%d/stop' % (API_VERSION, job.id), headers=self.headers)
    return Submission.from_dict(resp_dict['submission'])

  def get_submissions(self):
    resp_dict = self._root.get('%s/submissions' % API_VERSION, headers=self.headers)
    submissions = [Submission.from_dict(submission_dict) for submission_dict in resp_dict['submissions']]
    return submissions

  def set_user(self, user):
    self._user = user

  def set_language(self, language):
    self._language = language