Python SparkApi Examples, notebook.connectors.spark_shell.SparkApi Python Examples

Example #1

0

Show file

 def setUp(self):
   self.user = '******'
   self.interpreter = {
       'name': 'livy',
       'options': {
         'api_url': 'http://gethue.com:8998'
       },
     }
   self.api = SparkApi(self.user, self.interpreter)

Example #2

0

Show file

File: spark_shell_tests.py Project: mapr/hue

  def setUp(self):
    self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)
    self.user = User.objects.get(username="******")

    self.interpreter = {
        'name': 'livy',
        'options': {
          'api_url': 'http://gethue.com:8998'
        },
      }
    self.api = SparkApi(self.user, self.interpreter)

Example #3

0

Show file

File: views.py Project: JRed1989/hue

def notebook(request):
    notebook_id = request.GET.get('notebook')

    is_yarn_mode = False
    try:
        from spark.conf import LIVY_SERVER_SESSION_KIND
        is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
    except:
        LOG.exception('Spark is not enabled')

    return render(
        'notebook.mako', request, {
            'editor_id':
            notebook_id or None,
            'notebooks_json':
            '{}',
            'options_json':
            json.dumps({
                'languages': get_interpreters(request.user),
                'session_properties': SparkApi.get_properties(),
                'is_optimizer_enabled': has_optimizer(),
                'is_navigator_enabled': has_navigator(),
                'editor_type': 'notebook'
            }),
            'is_yarn_mode':
            is_yarn_mode,
        })

Example #4

0

Show file

File: views.py Project: rgarciarui/hue

def notebook(request):
    notebook_id = request.GET.get('notebook')

    if notebook_id:
        notebook = Notebook(document=Document2.objects.get(id=notebook_id))
    else:
        notebook = Notebook()

    is_yarn_mode = False
    try:
        from spark.conf import LIVY_SERVER_SESSION_KIND
        is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
    except:
        LOG.exception('Spark is not enabled')

    return render(
        'notebook.mako', request, {
            'notebooks_json':
            json.dumps([notebook.get_data()]),
            'options_json':
            json.dumps({
                'languages': get_interpreters(request.user),
                'session_properties': SparkApi.get_properties(),
                'is_optimizer_enabled': has_optimizer(),
            }),
            'is_yarn_mode':
            is_yarn_mode,
        })

Example #5

0

Show file

File: base.py Project: wdai-aa/hue

def get_api(request, snippet):
  from notebook.connectors.hiveserver2 import HS2Api
  from notebook.connectors.jdbc import JdbcApi
  from notebook.connectors.rdbms import RdbmsApi
  from notebook.connectors.pig_batch import PigApi
  from notebook.connectors.spark_shell import SparkApi
  from notebook.connectors.spark_batch import SparkBatchApi
  from notebook.connectors.text import TextApi

  interpreter = [interpreter for interpreter in get_interpreters(request.user) if interpreter['type'] == snippet['type']]
  if not interpreter:
    raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet)
  interpreter = interpreter[0]
  interface = interpreter['interface']

  if interface == 'hiveserver2':
    return HS2Api(user=request.user)
  elif interface == 'livy':
    return SparkApi(request.user)
  elif interface == 'livy-batch':
    return SparkBatchApi(request.user)
  elif interface == 'text' or interface == 'markdown':
    return TextApi(request.user)
  elif interface == 'rdbms':
    return RdbmsApi(request.user, interpreter=snippet['type'])
  elif interface == 'jdbc':
    return JdbcApi(request.user, interpreter=interpreter)
  elif interface == 'pig':
    return PigApi(user=request.user, request=request)
  else:
    raise PopupException(_('Notebook connector interface not recognized: %s') % interface)

Example #6

0

Show file

File: views.py Project: ziq211/hue

def notebook(request, is_embeddable=False):
    if not SHOW_NOTEBOOKS.get() or not request.user.has_hue_permission(
            action="access", app='notebook'):
        return serve_403_error(request)

    notebook_id = request.GET.get('notebook', request.GET.get('editor'))

    is_yarn_mode = False
    try:
        from spark.conf import LIVY_SERVER_SESSION_KIND
        is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
    except:
        LOG.exception('Spark is not enabled')

    return render(
        'notebook.mako', request, {
            'editor_id':
            notebook_id or None,
            'notebooks_json':
            '{}',
            'is_embeddable':
            request.GET.get('is_embeddable', False),
            'options_json':
            json.dumps({
                'languages': get_ordered_interpreters(request.user),
                'session_properties': SparkApi.get_properties(),
                'is_optimizer_enabled': has_optimizer(),
                'is_wa_enabled': has_workload_analytics(),
                'is_navigator_enabled': has_catalog(request.user),
                'editor_type': 'notebook'
            }),
            'is_yarn_mode':
            is_yarn_mode,
        })

Example #7

0

Show file

def notebook(request, is_embeddable=False):
    notebook_id = request.GET.get("notebook")

    is_yarn_mode = False
    try:
        from spark.conf import LIVY_SERVER_SESSION_KIND

        is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
    except:
        LOG.exception("Spark is not enabled")

    template = "notebook.mako"
    if is_embeddable:
        template = "notebook_embeddable.mako"

    return render(
        template,
        request,
        {
            "editor_id": notebook_id or None,
            "notebooks_json": "{}",
            "options_json": json.dumps(
                {
                    "languages": get_ordered_interpreters(request.user),
                    "session_properties": SparkApi.get_properties(),
                    "is_optimizer_enabled": has_optimizer(),
                    "is_navigator_enabled": has_navigator(request.user),
                    "editor_type": "notebook",
                }
            ),
            "is_yarn_mode": is_yarn_mode,
        },
    )

Example #8

0

Show file

def get_api(user, snippet, fs, jt):
  from notebook.connectors.hiveserver2 import HS2Api
  from notebook.connectors.jdbc import JdbcApi
  from notebook.connectors.mysql import MySqlApi
  from notebook.connectors.pig_batch import PigApi
  from notebook.connectors.spark_shell import SparkApi
  from notebook.connectors.spark_batch import SparkBatchApi
  from notebook.connectors.text import TextApi


  interpreter = [interpreter for interpreter in get_interpreters() if interpreter['type'] == snippet['type']]
  if not interpreter:
    raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet)
  interpreter = interpreter[0]
  interface = interpreter['interface']

  if interface == 'hiveserver2':
    return HS2Api(user)
  elif interface == 'livy':
    return SparkApi(user)
  elif interface == 'livy-batch':
    return SparkBatchApi(user)
  elif interface == 'text':
    return TextApi(user)
  elif interface == 'mysql':
    return MySqlApi(user)
  elif interface == 'jdbc':
    return JdbcApi(user, interpreter=interpreter)
  elif interface == 'pig':
    return PigApi(user, fs=fs, jt=jt)
  else:
    raise PopupException(_('Notebook connector interface not recognized: %s') % interface)

Example #9

0

Show file

class TestSparkApi(object):

  def setUp(self):
    self.user = '******'
    self.api = SparkApi(self.user)


  def test_create_session_plain(self):
    lang = 'pyspark'
    properties = None

    with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
      get_spark_api.return_value = Mock(
        create_session=Mock(
          return_value={'id': '1'}
        ),
        get_session=Mock(
          return_value={'state': 'idle', 'log': ''}
        )
      )

      session = self.api.create_session(lang=lang, properties=properties)

      assert_equal(session['type'], 'pyspark')
      assert_equal(session['id'], '1')

      files_properties = [prop for prop in session['properties'] if prop['name'] == 'files']
      assert_true(files_properties, session['properties'])
      assert_equal(files_properties[0]['value'], [], session['properties'])


  def test_get_jobs(self):
    local_jobs = [
      {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'}
    ]
    jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG)
    assert_equal(jobs, local_jobs, jobs)

    yarn_jobs = [
      {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'}
    ]
    jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG)
    assert_equal(jobs, yarn_jobs, jobs)

Example #10

0

Show file

def get_api(request, snippet):
  from notebook.connectors.dataeng import DataEngApi
  from notebook.connectors.hiveserver2 import HS2Api
  from notebook.connectors.jdbc import JdbcApi
  from notebook.connectors.rdbms import RdbmsApi
  from notebook.connectors.oozie_batch import OozieApi
  from notebook.connectors.solr import SolrApi
  from notebook.connectors.spark_shell import SparkApi
  from notebook.connectors.spark_batch import SparkBatchApi
  from notebook.connectors.text import TextApi

  if snippet.get('wasBatchExecuted'):
    return OozieApi(user=request.user, request=request)

  interpreter = [interpreter for interpreter in get_ordered_interpreters(request.user) if interpreter['type'] == snippet['type']]
  if not interpreter:
    raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet)
  interpreter = interpreter[0]
  interface = interpreter['interface']

  # Multi cluster
  cluster = Cluster(request.user)
  if cluster and cluster.get_type() == 'dataeng':
    interface = 'dataeng'

  if interface == 'hiveserver2':
    return HS2Api(user=request.user, request=request)
  elif interface == 'oozie':
    return OozieApi(user=request.user, request=request)
  elif interface == 'livy':
    return SparkApi(request.user)
  elif interface == 'livy-batch':
    return SparkBatchApi(request.user)
  elif interface == 'text' or interface == 'markdown':
    return TextApi(request.user)
  elif interface == 'rdbms':
    return RdbmsApi(request.user, interpreter=snippet['type'])
  elif interface == 'dataeng':
    return DataEngApi(user=request.user, request=request, cluster_name=cluster.get_interface())
  elif interface == 'jdbc':
    return JdbcApi(request.user, interpreter=interpreter)
  elif interface == 'solr':
    return SolrApi(request.user, interpreter=interpreter)
  elif interface == 'pig':
    return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4
  else:
    raise PopupException(_('Notebook connector interface not recognized: %s') % interface)

Example #11

0

Show file

File: views.py Project: FashtimeDotCom/hue

def notebook(request):
  notebook_id = request.GET.get('notebook')

  is_yarn_mode = False
  try:
    from spark.conf import LIVY_SERVER_SESSION_KIND
    is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
  except:
    LOG.exception('Spark is not enabled')

  return render('notebook.mako', request, {
      'editor_id': notebook_id or None,
      'notebooks_json': '{}',
      'options_json': json.dumps({
          'languages': get_interpreters(request.user),
          'session_properties': SparkApi.get_properties(),
          'is_optimizer_enabled': has_optimizer(),
      }),
      'is_yarn_mode': is_yarn_mode,
  })

Example #12

0

Show file

File: views.py Project: 277800076/hue

def notebook(request):
  notebook_id = request.GET.get('notebook')

  if notebook_id:
    notebook = Notebook(document=Document2.objects.get(id=notebook_id))
  else:
    notebook = Notebook()

  is_yarn_mode = False
  try:
    from spark.conf import LIVY_SERVER_SESSION_KIND
    is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
  except:
    LOG.exception('Spark is not enabled')

  return render('notebook.mako', request, {
      'notebooks_json': json.dumps([notebook.get_data()]),
      'options_json': json.dumps({
          'languages': get_interpreters(request.user),
          'session_properties': SparkApi.get_properties(),
          'is_optimizer_enabled': has_optimizer(),
      }),
      'is_yarn_mode': is_yarn_mode,
  })

Example #13

0

Show file

File: base.py Project: pioneerzcy/hue

def get_api(request, snippet):
    from notebook.connectors.oozie_batch import OozieApi

    if snippet.get('wasBatchExecuted'):
        return OozieApi(user=request.user, request=request)

    if snippet['type'] == 'report':
        snippet['type'] = 'impala'

    interpreter = [
        interpreter for interpreter in get_ordered_interpreters(request.user)
        if interpreter['type'] == snippet['type']
    ]
    if not interpreter:
        if snippet['type'] == 'hbase':
            interpreter = [{
                'name': 'hbase',
                'type': 'hbase',
                'interface': 'hbase',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'kafka':
            interpreter = [{
                'name': 'kafka',
                'type': 'kafka',
                'interface': 'kafka',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'solr':
            interpreter = [{
                'name': 'solr',
                'type': 'solr',
                'interface': 'solr',
                'options': {},
                'is_sql': False
            }]
        else:
            raise PopupException(
                _('Snippet type %(type)s is not configured in hue.ini') %
                snippet)

    interpreter = interpreter[0]
    interface = interpreter['interface']

    # Multi cluster
    cluster = json.loads(request.POST.get('cluster', '""'))  # Via Catalog API
    if cluster == 'undefined':
        cluster = None
    if not cluster and snippet.get('compute'):  # Via notebook.ko.js
        cluster = snippet.get('compute').get('id')
    if cluster and 'crn:altus:dataware:' in cluster:
        interface = 'altus-adb'
    if cluster:
        LOG.info('Selected cluster %s' % cluster)

    if interface == 'hiveserver2':
        from notebook.connectors.hiveserver2 import HS2Api
        return HS2Api(user=request.user, request=request, cluster=cluster)
    elif interface == 'oozie':
        return OozieApi(user=request.user, request=request)
    elif interface == 'livy':
        from notebook.connectors.spark_shell import SparkApi
        return SparkApi(request.user)
    elif interface == 'livy-batch':
        from notebook.connectors.spark_batch import SparkBatchApi
        return SparkBatchApi(request.user)
    elif interface == 'text' or interface == 'markdown':
        from notebook.connectors.text import TextApi
        return TextApi(request.user)
    elif interface == 'rdbms':
        from notebook.connectors.rdbms import RdbmsApi
        return RdbmsApi(request.user, interpreter=snippet['type'])
    elif interface == 'altus-adb':
        from notebook.connectors.altus_adb import AltusAdbApi
        return AltusAdbApi(user=request.user,
                           cluster_name=cluster,
                           request=request)
    elif interface == 'dataeng':
        from notebook.connectors.dataeng import DataEngApi
        return DataEngApi(user=request.user,
                          request=request,
                          cluster_name=cluster.get('name'))
    elif interface == 'jdbc' or interface == 'teradata':
        from notebook.connectors.jdbc import JdbcApi
        return JdbcApi(request.user, interpreter=interpreter)
    elif interface == 'sqlalchemy':
        from notebook.connectors.sqlalchemyapi import SqlAlchemyApi
        return SqlAlchemyApi(request.user, interpreter=interpreter)
    elif interface == 'solr':
        from notebook.connectors.solr import SolrApi
        return SolrApi(request.user, interpreter=interpreter)
    elif interface == 'hbase':
        from notebook.connectors.hbase import HBaseApi
        return HBaseApi(request.user)
    elif interface == 'kafka':
        from notebook.connectors.kafka import KafkaApi
        return KafkaApi(request.user)
    elif interface == 'pig':
        return OozieApi(user=request.user,
                        request=request)  # Backward compatibility until Hue 4
    else:
        raise PopupException(
            _('Notebook connector interface not recognized: %s') % interface)

Example #14

0

Show file

File: base.py Project: tonyzhimin/hue

def get_api(request, snippet):
    from notebook.connectors.oozie_batch import OozieApi

    if snippet.get('wasBatchExecuted'):
        return OozieApi(user=request.user, request=request)

    if snippet['type'] == 'report':
        snippet['type'] = 'impala'

    interpreter = [
        interpreter for interpreter in get_ordered_interpreters(request.user)
        if snippet['type'] in (interpreter['type'], interpreter['interface'])
    ]
    if not interpreter:
        if snippet['type'] == 'hbase':
            interpreter = [{
                'name': 'hbase',
                'type': 'hbase',
                'interface': 'hbase',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'kafka':
            interpreter = [{
                'name': 'kafka',
                'type': 'kafka',
                'interface': 'kafka',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'solr':
            interpreter = [{
                'name': 'solr',
                'type': 'solr',
                'interface': 'solr',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'custom':
            interpreter = [{
                'name': snippet['name'],
                'type': snippet['type'],
                'interface': snippet['interface'],
                'options': snippet.get('options', {}),
                'is_sql': False
            }]
        else:
            raise PopupException(
                _('Snippet type %(type)s is not configured.') % snippet)

    interpreter = interpreter[0]
    interface = interpreter['interface']

    if CONNECTORS.IS_ENABLED.get():
        cluster = {
            'connector': snippet['type'],
            'id': interpreter['type'],
        }
        snippet['type'] = snippet['type'].split('-', 2)[0]
        cluster.update(interpreter['options'])
    # Multi cluster
    elif has_multi_cluster():
        cluster = json.loads(request.POST.get(
            'cluster',
            '""'))  # Via Catalog autocomplete API or Notebook create sessions
        if cluster == '""' or cluster == 'undefined':
            cluster = None
        if not cluster and snippet.get('compute'):  # Via notebook.ko.js
            cluster = snippet['compute']
    else:
        cluster = None

    cluster_name = cluster.get('id') if cluster else None

    if cluster and 'altus:dataware:k8s' in cluster_name:
        interface = 'hiveserver2'
    elif cluster and 'crn:altus:dataware:' in cluster_name:
        interface = 'altus-adb'
    elif cluster and 'crn:altus:dataeng:' in cluster_name:
        interface = 'dataeng'

    LOG.info('Selected cluster %s %s interface %s' %
             (cluster_name, cluster, interface))
    snippet['interface'] = interface

    if interface.startswith('hiveserver2') or interface == 'hms':
        from notebook.connectors.hiveserver2 import HS2Api
        return HS2Api(user=request.user,
                      request=request,
                      cluster=cluster,
                      interface=interface)
    elif interface == 'oozie':
        return OozieApi(user=request.user, request=request)
    elif interface == 'livy':
        from notebook.connectors.spark_shell import SparkApi
        return SparkApi(request.user)
    elif interface == 'livy-batch':
        from notebook.connectors.spark_batch import SparkBatchApi
        return SparkBatchApi(request.user)
    elif interface == 'text' or interface == 'markdown':
        from notebook.connectors.text import TextApi
        return TextApi(request.user)
    elif interface == 'rdbms':
        from notebook.connectors.rdbms import RdbmsApi
        return RdbmsApi(request.user,
                        interpreter=snippet['type'],
                        query_server=snippet.get('query_server'))
    elif interface == 'altus-adb':
        from notebook.connectors.altus_adb import AltusAdbApi
        return AltusAdbApi(user=request.user,
                           cluster_name=cluster_name,
                           request=request)
    elif interface == 'dataeng':
        from notebook.connectors.dataeng import DataEngApi
        return DataEngApi(user=request.user,
                          request=request,
                          cluster_name=cluster_name)
    elif interface == 'jdbc':
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('teradata') >= 0:
            from notebook.connectors.jdbc_teradata import JdbcApiTeradata
            return JdbcApiTeradata(request.user, interpreter=interpreter)
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('awsathena') >= 0:
            from notebook.connectors.jdbc_athena import JdbcApiAthena
            return JdbcApiAthena(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('presto') >= 0:
            from notebook.connectors.jdbc_presto import JdbcApiPresto
            return JdbcApiPresto(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('clickhouse') >= 0:
            from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse
            return JdbcApiClickhouse(request.user, interpreter=interpreter)
        else:
            from notebook.connectors.jdbc import JdbcApi
            return JdbcApi(request.user, interpreter=interpreter)
    elif interface == 'teradata':
        from notebook.connectors.jdbc import JdbcApiTeradata
        return JdbcApiTeradata(request.user, interpreter=interpreter)
    elif interface == 'athena':
        from notebook.connectors.jdbc import JdbcApiAthena
        return JdbcApiAthena(request.user, interpreter=interpreter)
    elif interface == 'presto':
        from notebook.connectors.jdbc_presto import JdbcApiPresto
        return JdbcApiPresto(request.user, interpreter=interpreter)
    elif interface == 'sqlalchemy':
        from notebook.connectors.sqlalchemyapi import SqlAlchemyApi
        return SqlAlchemyApi(request.user, interpreter=interpreter)
    elif interface == 'solr':
        from notebook.connectors.solr import SolrApi
        return SolrApi(request.user, interpreter=interpreter)
    elif interface == 'hbase':
        from notebook.connectors.hbase import HBaseApi
        return HBaseApi(request.user)
    elif interface == 'kafka':
        from notebook.connectors.kafka import KafkaApi
        return KafkaApi(request.user)
    elif interface == 'pig':
        return OozieApi(user=request.user,
                        request=request)  # Backward compatibility until Hue 4
    else:
        raise PopupException(
            _('Notebook connector interface not recognized: %s') % interface)

Example #15

0

Show file

class TestSparkApi(object):

  def setUp(self):
    self.user = '******'
    self.interpreter = {
        'name': 'livy',
        'options': {
          'api_url': 'http://gethue.com:8998'
        },
      }
    self.api = SparkApi(self.user, self.interpreter)


  def test_get_api(self):
    lang = 'pyspark'
    properties = None

    # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
    spark_api = self.api.get_api()
    assert_equal(spark_api.__class__.__name__, 'LivyClient')

  def test_get_livy_props_method(self):
    test_properties = [{
        "name": "files",
        "value": 'file_a,file_b,file_c',
      }]
    props = self.api.get_livy_props('scala', test_properties)
    assert_equal(props['files'], ['file_a', 'file_b', 'file_c'])

  def test_create_session_with_config(self):
    lang = 'pyspark'
    properties = None

    with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
      with patch('notebook.connectors.spark_shell.DefaultConfiguration') as DefaultConfiguration:
        with patch('notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION') as USE_DEFAULT_CONFIGURATION:
          DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock(
                properties_list=[
                  {'multiple': False, 'name': 'driverCores', 'defaultValue': 1, 'value': 2, 'nice_name': 'Driver Cores',
                   'help_text': 'Number of cores used by the driver, only in cluster mode (Default: 1)', 'type': 'number',
                   'is_yarn': True}]
          )

          get_spark_api.return_value = Mock(
            create_session=Mock(
              return_value={'id': '1'}
            ),
            get_session=Mock(
              return_value={'state': 'idle', 'log': ''}
            )
          )
          # Case with user configuration. Expected 2 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = True
          session = self.api.create_session(lang=lang, properties=properties)
          assert_equal(session['type'], 'pyspark')
          assert_equal(session['id'], '1')
          for p in session['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 2)

          # Case without user configuration. Expected 1 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = True
          DefaultConfiguration.objects.get_configuration_for_user.return_value = None
          session2 = self.api.create_session(lang=lang, properties=properties)
          assert_equal(session2['type'], 'pyspark')
          assert_equal(session2['id'], '1')
          for p in session2['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 1)

          # Case with no user configuration. Expected 1 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = False
          session3 = self.api.create_session(lang=lang, properties=properties)
          assert_equal(session3['type'], 'pyspark')
          assert_equal(session3['id'], '1')
          for p in session3['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 1)

  def test_create_session_plain(self):
    lang = 'pyspark'
    properties = None

    with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
      get_spark_api.return_value = Mock(
        create_session=Mock(
          return_value={'id': '1'}
        ),
        get_session=Mock(
          return_value={'state': 'idle', 'log': ''}
        )
      )

      session = self.api.create_session(lang=lang, properties=properties)

      assert_equal(session['type'], 'pyspark')
      assert_equal(session['id'], '1')

      files_properties = [prop for prop in session['properties'] if prop['name'] == 'files']
      assert_true(files_properties, session['properties'])
      assert_equal(files_properties[0]['value'], [], session['properties'])

  def test_get_jobs(self):
    local_jobs = [
      {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'}
    ]
    jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG)
    assert_equal(jobs, local_jobs, jobs)

    yarn_jobs = [
      {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'}
    ]
    jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG)
    assert_equal(jobs, yarn_jobs, jobs)

Example #16

0

Show file

File: tests.py Project: san21886/hue

 def setUp(self):
     self.user = "******"
     self.api = SparkApi(self.user)

Example #17

0

Show file

File: tests.py Project: ralic/hue

class TestSparkShellConnector(object):

  LIVY_STANDALONE_LOG = """
    Starting livy-repl on http://172.21.1.246:58449
    Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    15/10/05 14:02:33 INFO SparkContext: Running Spark version 1.5.0
    15/10/05 14:02:33 INFO SecurityManager: Changing view acls to: huetest
    15/10/05 14:02:33 INFO SecurityManager: Changing modify acls to: huetest
    15/10/05 14:02:33 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest)
    15/10/05 14:02:33 INFO Slf4jLogger: Slf4jLogger started
    15/10/05 14:02:33 INFO Remoting: Starting remoting
    15/10/05 14:02:33 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:58451]
    15/10/05 14:02:33 INFO Utils: Successfully started service 'sparkDriver' on port 58451.
    15/10/05 14:02:33 INFO SparkEnv: Registering MapOutputTracker
    15/10/05 14:02:33 INFO SparkEnv: Registering BlockManagerMaster
    15/10/05 14:02:33 INFO DiskBlockManager: Created local directory at /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/blockmgr-f63fdd28-6d86-4ae6-a91c-902fb0310fb4
    15/10/05 14:02:33 INFO MemoryStore: MemoryStore started with capacity 530.0 MB
    15/10/05 14:02:33 INFO HttpFileServer: HTTP File server directory is /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/httpd-0235b01f-ee8b-40fd-96a9-de946b1a3426
    15/10/05 14:02:33 INFO HttpServer: Starting HTTP Server
    15/10/05 14:02:33 INFO Utils: Successfully started service 'HTTP file server' on port 58452.
    15/10/05 14:02:33 INFO SparkEnv: Registering OutputCommitCoordinator
    15/10/05 14:02:33 INFO Utils: Successfully started service 'SparkUI' on port 4040.
    15/10/05 14:02:33 INFO SparkUI: Started SparkUI at http://172.21.1.246:4040
    15/10/05 14:02:34 INFO SparkContext: Added JAR file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar at http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103
    15/10/05 14:02:34 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set.
    15/10/05 14:02:34 INFO Executor: Starting executor ID driver on host localhost
    15/10/05 14:02:34 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 58453.
    15/10/05 14:02:34 INFO NettyBlockTransferService: Server created on 58453
    15/10/05 14:02:34 INFO BlockManagerMaster: Trying to register BlockManager
    15/10/05 14:02:34 INFO BlockManagerMasterEndpoint: Registering block manager localhost:58453 with 530.0 MB RAM, BlockManagerId(driver, localhost, 58453)
    15/10/05 14:02:34 INFO BlockManagerMaster: Registered BlockManager
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(130448) called with curMem=0, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 127.4 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(14276) called with curMem=130448, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 13.9 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:58453 (size: 13.9 KB, free: 530.0 MB)
    15/10/05 14:02:36 INFO SparkContext: Created broadcast 0 from textFile at NativeMethodAccessorImpl.java:-2
    15/10/05 14:02:36 INFO FileInputFormat: Total input paths to process : 1
    15/10/05 14:02:36 INFO SparkContext: Starting job: collect at <stdin>:1
    15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 3 (reduceByKey at <stdin>:1)
    15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 7 (combineByKey at <stdin>:3)
    15/10/05 14:02:36 INFO DAGScheduler: Got job 0 (collect at <stdin>:1) with 2 output partitions
    15/10/05 14:02:36 INFO DAGScheduler: Final stage: ResultStage 2(collect at <stdin>:1)
    15/10/05 14:02:36 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 1)
    15/10/05 14:02:36 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 1)
    15/10/05 14:02:36 INFO DAGScheduler: Submitting ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1), which has no missing parents
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(8960) called with curMem=144724, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 8.8 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(5483) called with curMem=153684, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 5.4 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:58453 (size: 5.4 KB, free: 530.0 MB)
    15/10/05 14:02:36 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861
    15/10/05 14:02:36 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1)
    15/10/05 14:02:36 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks
    15/10/05 14:02:36 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2266 bytes)
    15/10/05 14:02:36 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 2266 bytes)
    15/10/05 14:02:36 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
    15/10/05 14:02:36 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
    15/10/05 14:02:36 INFO Executor: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103
    15/10/05 14:02:36 INFO Utils: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar to /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/fetchFileTemp476551478197543813.tmp
    15/10/05 14:02:36 INFO Executor: Adding file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/livy-assembly-3.9.0-SNAPSHOT.jar to class loader
    15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:0+8609511
    15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:8609511+8609511
    15/10/05 14:02:36 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
    15/10/05 14:02:36 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
    15/10/05 14:02:36 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
    15/10/05 14:02:36 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
    15/10/05 14:02:36 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
    15/10/05 14:02:37 INFO PythonRDD: Times: total = 727, boot = 229, init = 44, finish = 454
    15/10/05 14:02:37 INFO PythonRDD: Times: total = 730, boot = 226, init = 46, finish = 458
    15/10/05 14:02:37 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 2318 bytes result sent to driver
    15/10/05 14:02:37 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2318 bytes result sent to driver
    15/10/05 14:02:37 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 950 ms on localhost (1/2)
    15/10/05 14:02:37 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 962 ms on localhost (2/2)
    15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
    15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at <stdin>:1) finished in 0.973 s
    15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages
  """
  LIVY_YARN_LOG = """
    15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032
    15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers
    15/10/05 13:51:21 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (2048 MB per container)
    15/10/05 13:51:21 INFO yarn.Client: Will allocate AM container, with 1408 MB memory including 384 MB overhead
    15/10/05 13:51:21 INFO yarn.Client: Setting up container launch context for our AM
    15/10/05 13:51:21 INFO yarn.Client: Setting up the launch environment for our AM container
    15/10/05 13:51:21 INFO yarn.Client: Preparing resources for our AM container
    15/10/05 13:51:21 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    15/10/05 13:51:21 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/lib/spark-assembly-1.5.0-hadoop2.6.0.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/spark-assembly-1.5.0-hadoop2.6.0.jar
    15/10/05 13:52:00 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/livy-assembly-3.9.0-SNAPSHOT.jar
    15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/pyspark.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/pyspark.zip
    15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/py4j-0.8.2.1-src.zip
    15/10/05 13:52:10 INFO yarn.Client: Uploading resource file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-3bde33db-374c-4abe-a4af-704bd5dc09d2/__spark_conf__4420686202746650998.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/__spark_conf__4420686202746650998.zip
    15/10/05 13:52:10 INFO spark.SecurityManager: Changing view acls to: huetest
    15/10/05 13:52:10 INFO spark.SecurityManager: Changing modify acls to: huetest
    15/10/05 13:52:10 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest)
    15/10/05 13:52:10 INFO yarn.Client: Submitting application 2 to ResourceManager
    15/10/05 13:52:10 INFO impl.YarnClientImpl: Submitted application application_1444070328046_0002
    15/10/05 13:52:11 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:11 INFO yarn.Client:
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: N/A
         ApplicationMaster RPC port: -1
         queue: root.huetest
         start time: 1444078329419
         final status: UNDEFINED
         tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/
         user: huetest
    15/10/05 13:52:12 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:13 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:14 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:16 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:17 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:18 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:19 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:20 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:21 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:22 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:23 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING)
    15/10/05 13:52:23 INFO yarn.Client:
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: 175.18.213.12
         ApplicationMaster RPC port: 0
         queue: root.huetest
         start time: 1444078329419
         final status: UNDEFINED
         tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/
         user: huetest
    15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING)
  """

  def setUp(self):
    self.user = '******'
    self.api = SparkApi(self.user)


  def test_get_jobs(self):
    local_jobs = [
      {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'}
    ]
    jobs = self.api._get_standalone_jobs(self.LIVY_STANDALONE_LOG)
    assert_equal(jobs, local_jobs, jobs)

    yarn_jobs = [
      {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'}
    ]
    jobs = self.api._get_yarn_jobs(self.LIVY_YARN_LOG)
    assert_equal(jobs, yarn_jobs, jobs)

Example #18

0

Show file

class TestSparkApi(object):
    def setUp(self):
        self.user = '******'
        self.interpreter = {
            'name': 'livy',
            'options': {
                'api_url': 'http://gethue.com:8998'
            },
        }
        self.api = SparkApi(self.user, self.interpreter)

    def test_get_api(self):
        lang = 'pyspark'
        properties = None

        # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
        spark_api = self.api.get_api()
        assert_equal(spark_api.__class__.__name__, 'LivyClient')

    def test_get_livy_props_method(self):
        test_properties = [{
            "name": "files",
            "value": 'file_a,file_b,file_c',
        }]
        props = self.api.get_livy_props('scala', test_properties)
        assert_equal(props['files'], ['file_a', 'file_b', 'file_c'])

    def test_create_session_with_config(self):
        lang = 'pyspark'
        properties = None
        session_key = self.api._get_session_key()

        with patch('notebook.connectors.spark_shell.get_spark_api'
                   ) as get_spark_api:
            with patch('notebook.connectors.spark_shell.DefaultConfiguration'
                       ) as DefaultConfiguration:
                with patch(
                        'notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION'
                ) as USE_DEFAULT_CONFIGURATION:
                    DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock(
                        properties_list=[{
                            'multiple': False,
                            'name': 'driverCores',
                            'defaultValue': 1,
                            'value': 2,
                            'nice_name': 'Driver Cores',
                            'help_text':
                            'Number of cores used by the driver, only in cluster mode (Default: 1)',
                            'type': 'number',
                            'is_yarn': True
                        }])

                    get_spark_api.return_value = Mock(
                        create_session=Mock(return_value={'id': '1'}),
                        get_session=Mock(return_value={
                            'state': 'idle',
                            'log': ''
                        }))
                    # Case with user configuration. Expected 2 driverCores
                    USE_DEFAULT_CONFIGURATION.get.return_value = True
                    session = self.api.create_session(lang=lang,
                                                      properties=properties)

                    assert_equal(session['type'], 'pyspark')
                    assert_equal(session['id'], '1')

                    for p in session['properties']:
                        if p['name'] == 'driverCores':
                            cores = p['value']
                    assert_equal(cores, 2)

                    if SESSIONS.get(session_key):
                        del SESSIONS[session_key]

                    # Case without user configuration. Expected 1 driverCores
                    USE_DEFAULT_CONFIGURATION.get.return_value = True
                    DefaultConfiguration.objects.get_configuration_for_user.return_value = None
                    session2 = self.api.create_session(lang=lang,
                                                       properties=properties)

                    assert_equal(session2['type'], 'pyspark')
                    assert_equal(session2['id'], '1')

                    for p in session2['properties']:
                        if p['name'] == 'driverCores':
                            cores = p['value']
                    assert_equal(cores, 1)

                    if SESSIONS.get(session_key):
                        del SESSIONS[session_key]

                    # Case with no user configuration. Expected 1 driverCores
                    USE_DEFAULT_CONFIGURATION.get.return_value = False
                    session3 = self.api.create_session(lang=lang,
                                                       properties=properties)

                    assert_equal(session3['type'], 'pyspark')
                    assert_equal(session3['id'], '1')

                    for p in session3['properties']:
                        if p['name'] == 'driverCores':
                            cores = p['value']
                    assert_equal(cores, 1)

                    if SESSIONS.get(session_key):
                        del SESSIONS[session_key]

    def test_create_session_plain(self):
        lang = 'pyspark'
        properties = None
        session_key = self.api._get_session_key()

        with patch('notebook.connectors.spark_shell.get_spark_api'
                   ) as get_spark_api:
            get_spark_api.return_value = Mock(
                create_session=Mock(return_value={'id': '1'}),
                get_session=Mock(return_value={
                    'state': 'idle',
                    'log': ''
                }))

            session = self.api.create_session(lang=lang, properties=properties)

            assert_equal(session['type'], 'pyspark')
            assert_equal(session['id'], '1')

            files_properties = [
                prop for prop in session['properties']
                if prop['name'] == 'files'
            ]
            assert_true(files_properties, session['properties'])
            assert_equal(files_properties[0]['value'], [],
                         session['properties'])

            if SESSIONS.get(session_key):
                del SESSIONS[session_key]

    def test_execute(self):
        with patch('notebook.connectors.spark_shell._get_snippet_session'
                   ) as _get_snippet_session:
            with patch('notebook.connectors.spark_shell.get_spark_api'
                       ) as get_spark_api:
                notebook = Mock()
                snippet = {'statement': 'select * from test_table'}
                _get_snippet_session.return_value = {'id': '1'}

                get_spark_api.return_value = Mock(submit_statement=Mock(
                    return_value={'id': 'test_id'}))

                response = self.api.execute(notebook, snippet)
                assert_equal(response['id'], 'test_id')

                get_spark_api.return_value = Mock(submit_statement=Mock())
                assert_raises(Exception, self.api.execute, notebook, snippet)

    def test_check_status(self):
        with patch('notebook.connectors.spark_shell._get_snippet_session'
                   ) as _get_snippet_session:
            with patch('notebook.connectors.spark_shell.get_spark_api'
                       ) as get_spark_api:
                notebook = Mock()
                snippet = {'result': {'handle': {'id': {'test_id'}}}}
                _get_snippet_session.return_value = {'id': '1'}

                get_spark_api.return_value = Mock(fetch_data=Mock(
                    return_value={'state': 'test_state'}))

                response = self.api.check_status(notebook, snippet)
                assert_equal(response['status'], 'test_state')

                get_spark_api.return_value = Mock(submit_statement=Mock())
                assert_raises(Exception, self.api.check_status, notebook,
                              snippet)

    def test_get_sample_data(self):
        snippet = Mock()
        self.api._execute = Mock(return_value='test_value')
        self.api._check_status_and_fetch_result = Mock(return_value={
            'data': 'test_data',
            'meta': 'test_meta'
        })

        response = self.api.get_sample_data(snippet, 'test_db', 'test_table',
                                            'test_column')

        assert_equal(response['rows'], 'test_data')
        assert_equal(response['full_headers'], 'test_meta')

    def test_get_select_query(self):
        # With operation as 'hello'
        response = self.api._get_select_query('test_db', 'test_table',
                                              'test_column', 'hello')
        assert_equal(response, "SELECT 'Hello World!'")

        # Without column name
        response = self.api._get_select_query('test_db', 'test_table')
        assert_equal(response,
                     'SELECT *\nFROM test_db.test_table\nLIMIT 100\n')

        # With some column name
        response = self.api._get_select_query('test_db', 'test_table',
                                              'test_column')
        assert_equal(
            response,
            'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n')

    def test_get_jobs(self):
        local_jobs = [{
            'url': u'http://172.21.1.246:4040/jobs/job/?id=0',
            'name': u'0'
        }]
        jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG)
        assert_equal(jobs, local_jobs, jobs)

        yarn_jobs = [{
            'url':
            u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/',
            'name': u'application_1444070328046_0002'
        }]
        jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG)
        assert_equal(jobs, yarn_jobs, jobs)

Example #19

0

Show file

File: spark_tests.py Project: ziq211/hue

class TestSparkShellConnector(object):

    LIVY_STANDALONE_LOG = """
    Starting livy-repl on http://172.21.1.246:58449
    Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    15/10/05 14:02:33 INFO SparkContext: Running Spark version 1.5.0
    15/10/05 14:02:33 INFO SecurityManager: Changing view acls to: huetest
    15/10/05 14:02:33 INFO SecurityManager: Changing modify acls to: huetest
    15/10/05 14:02:33 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest)
    15/10/05 14:02:33 INFO Slf4jLogger: Slf4jLogger started
    15/10/05 14:02:33 INFO Remoting: Starting remoting
    15/10/05 14:02:33 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:58451]
    15/10/05 14:02:33 INFO Utils: Successfully started service 'sparkDriver' on port 58451.
    15/10/05 14:02:33 INFO SparkEnv: Registering MapOutputTracker
    15/10/05 14:02:33 INFO SparkEnv: Registering BlockManagerMaster
    15/10/05 14:02:33 INFO DiskBlockManager: Created local directory at /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/blockmgr-f63fdd28-6d86-4ae6-a91c-902fb0310fb4
    15/10/05 14:02:33 INFO MemoryStore: MemoryStore started with capacity 530.0 MB
    15/10/05 14:02:33 INFO HttpFileServer: HTTP File server directory is /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/httpd-0235b01f-ee8b-40fd-96a9-de946b1a3426
    15/10/05 14:02:33 INFO HttpServer: Starting HTTP Server
    15/10/05 14:02:33 INFO Utils: Successfully started service 'HTTP file server' on port 58452.
    15/10/05 14:02:33 INFO SparkEnv: Registering OutputCommitCoordinator
    15/10/05 14:02:33 INFO Utils: Successfully started service 'SparkUI' on port 4040.
    15/10/05 14:02:33 INFO SparkUI: Started SparkUI at http://172.21.1.246:4040
    15/10/05 14:02:34 INFO SparkContext: Added JAR file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar at http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103
    15/10/05 14:02:34 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set.
    15/10/05 14:02:34 INFO Executor: Starting executor ID driver on host localhost
    15/10/05 14:02:34 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 58453.
    15/10/05 14:02:34 INFO NettyBlockTransferService: Server created on 58453
    15/10/05 14:02:34 INFO BlockManagerMaster: Trying to register BlockManager
    15/10/05 14:02:34 INFO BlockManagerMasterEndpoint: Registering block manager localhost:58453 with 530.0 MB RAM, BlockManagerId(driver, localhost, 58453)
    15/10/05 14:02:34 INFO BlockManagerMaster: Registered BlockManager
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(130448) called with curMem=0, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 127.4 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(14276) called with curMem=130448, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 13.9 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:58453 (size: 13.9 KB, free: 530.0 MB)
    15/10/05 14:02:36 INFO SparkContext: Created broadcast 0 from textFile at NativeMethodAccessorImpl.java:-2
    15/10/05 14:02:36 INFO FileInputFormat: Total input paths to process : 1
    15/10/05 14:02:36 INFO SparkContext: Starting job: collect at <stdin>:1
    15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 3 (reduceByKey at <stdin>:1)
    15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 7 (combineByKey at <stdin>:3)
    15/10/05 14:02:36 INFO DAGScheduler: Got job 0 (collect at <stdin>:1) with 2 output partitions
    15/10/05 14:02:36 INFO DAGScheduler: Final stage: ResultStage 2(collect at <stdin>:1)
    15/10/05 14:02:36 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 1)
    15/10/05 14:02:36 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 1)
    15/10/05 14:02:36 INFO DAGScheduler: Submitting ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1), which has no missing parents
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(8960) called with curMem=144724, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 8.8 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(5483) called with curMem=153684, maxMem=555755765
    15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 5.4 KB, free 529.9 MB)
    15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:58453 (size: 5.4 KB, free: 530.0 MB)
    15/10/05 14:02:36 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861
    15/10/05 14:02:36 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1)
    15/10/05 14:02:36 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks
    15/10/05 14:02:36 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2266 bytes)
    15/10/05 14:02:36 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 2266 bytes)
    15/10/05 14:02:36 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
    15/10/05 14:02:36 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
    15/10/05 14:02:36 INFO Executor: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103
    15/10/05 14:02:36 INFO Utils: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar to /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/fetchFileTemp476551478197543813.tmp
    15/10/05 14:02:36 INFO Executor: Adding file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/livy-assembly-3.9.0-SNAPSHOT.jar to class loader
    15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:0+8609511
    15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:8609511+8609511
    15/10/05 14:02:36 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
    15/10/05 14:02:36 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
    15/10/05 14:02:36 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
    15/10/05 14:02:36 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
    15/10/05 14:02:36 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
    15/10/05 14:02:37 INFO PythonRDD: Times: total = 727, boot = 229, init = 44, finish = 454
    15/10/05 14:02:37 INFO PythonRDD: Times: total = 730, boot = 226, init = 46, finish = 458
    15/10/05 14:02:37 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 2318 bytes result sent to driver
    15/10/05 14:02:37 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2318 bytes result sent to driver
    15/10/05 14:02:37 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 950 ms on localhost (1/2)
    15/10/05 14:02:37 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 962 ms on localhost (2/2)
    15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
    15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at <stdin>:1) finished in 0.973 s
    15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages
  """
    LIVY_YARN_LOG = """
    15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032
    15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers
    15/10/05 13:51:21 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (2048 MB per container)
    15/10/05 13:51:21 INFO yarn.Client: Will allocate AM container, with 1408 MB memory including 384 MB overhead
    15/10/05 13:51:21 INFO yarn.Client: Setting up container launch context for our AM
    15/10/05 13:51:21 INFO yarn.Client: Setting up the launch environment for our AM container
    15/10/05 13:51:21 INFO yarn.Client: Preparing resources for our AM container
    15/10/05 13:51:21 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    15/10/05 13:51:21 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/lib/spark-assembly-1.5.0-hadoop2.6.0.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/spark-assembly-1.5.0-hadoop2.6.0.jar
    15/10/05 13:52:00 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/livy-assembly-3.9.0-SNAPSHOT.jar
    15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/pyspark.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/pyspark.zip
    15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/py4j-0.8.2.1-src.zip
    15/10/05 13:52:10 INFO yarn.Client: Uploading resource file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-3bde33db-374c-4abe-a4af-704bd5dc09d2/__spark_conf__4420686202746650998.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/__spark_conf__4420686202746650998.zip
    15/10/05 13:52:10 INFO spark.SecurityManager: Changing view acls to: huetest
    15/10/05 13:52:10 INFO spark.SecurityManager: Changing modify acls to: huetest
    15/10/05 13:52:10 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest)
    15/10/05 13:52:10 INFO yarn.Client: Submitting application 2 to ResourceManager
    15/10/05 13:52:10 INFO impl.YarnClientImpl: Submitted application application_1444070328046_0002
    15/10/05 13:52:11 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:11 INFO yarn.Client:
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: N/A
         ApplicationMaster RPC port: -1
         queue: root.huetest
         start time: 1444078329419
         final status: UNDEFINED
         tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/
         user: huetest
    15/10/05 13:52:12 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:13 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:14 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:16 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:17 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:18 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:19 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:20 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:21 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:22 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED)
    15/10/05 13:52:23 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING)
    15/10/05 13:52:23 INFO yarn.Client:
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: 175.18.213.12
         ApplicationMaster RPC port: 0
         queue: root.huetest
         start time: 1444078329419
         final status: UNDEFINED
         tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/
         user: huetest
    15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING)
  """

    def setUp(self):
        self.user = '******'
        self.api = SparkApi(self.user)

    def test_get_jobs(self):
        local_jobs = [{
            'url': u'http://172.21.1.246:4040/jobs/job/?id=0',
            'name': u'0'
        }]
        jobs = self.api._get_standalone_jobs(self.LIVY_STANDALONE_LOG)
        assert_equal(jobs, local_jobs, jobs)

        yarn_jobs = [{
            'url':
            u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/',
            'name': u'application_1444070328046_0002'
        }]
        jobs = self.api._get_yarn_jobs(self.LIVY_YARN_LOG)
        assert_equal(jobs, yarn_jobs, jobs)

Example #20

0

Show file

File: tests.py Project: ralic/hue

 def setUp(self):
   self.user = '******'
   self.api = SparkApi(self.user)

Example #21

0

Show file

File: spark_shell_tests.py Project: mapr/hue

class TestSparkApi(object):

  def setUp(self):
    self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)
    self.user = User.objects.get(username="******")

    self.interpreter = {
        'name': 'livy',
        'options': {
          'api_url': 'http://gethue.com:8998'
        },
      }
    self.api = SparkApi(self.user, self.interpreter)


  def test_get_api(self):
    lang = 'pyspark'
    properties = None

    # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
    spark_api = self.api.get_api()
    assert_equal(spark_api.__class__.__name__, 'LivyClient')


  def test_get_livy_props_method(self):
    test_properties = [{
        "name": "files",
        "value": 'file_a,file_b,file_c',
      }]
    props = self.api.get_livy_props('scala', test_properties)
    assert_equal(props['files'], ['file_a', 'file_b', 'file_c'])


  def test_create_session_with_config(self):
    lang = 'pyspark'
    properties = None
    session_key = self.api._get_session_key()

    with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
      with patch('notebook.connectors.spark_shell.DefaultConfiguration') as DefaultConfiguration:
        with patch('notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION') as USE_DEFAULT_CONFIGURATION:
          DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock(
                properties_list=[
                  {'multiple': False, 'name': 'driverCores', 'defaultValue': 1, 'value': 2, 'nice_name': 'Driver Cores',
                   'help_text': 'Number of cores used by the driver, only in cluster mode (Default: 1)', 'type': 'number',
                   'is_yarn': True}]
          )

          get_spark_api.return_value = Mock(
            create_session=Mock(
              return_value={'id': '1'}
            ),
            get_session=Mock(
              return_value={'state': 'idle', 'log': ''}
            )
          )
          # Case with user configuration. Expected 2 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = True
          session = self.api.create_session(lang=lang, properties=properties)

          assert_equal(session['type'], 'pyspark')
          assert_equal(session['id'], '1')

          for p in session['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 2)

          if self.api._get_session_info_from_user():
            self.api._remove_session_info_from_user()

          # Case without user configuration. Expected 1 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = True
          DefaultConfiguration.objects.get_configuration_for_user.return_value = None
          session2 = self.api.create_session(lang=lang, properties=properties)

          assert_equal(session2['type'], 'pyspark')
          assert_equal(session2['id'], '1')

          for p in session2['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 1)

          # Case with no user configuration. Expected 1 driverCores
          USE_DEFAULT_CONFIGURATION.get.return_value = False
          session3 = self.api.create_session(lang=lang, properties=properties)

          assert_equal(session3['type'], 'pyspark')
          assert_equal(session3['id'], '1')

          for p in session3['properties']:
            if p['name'] == 'driverCores':
              cores = p['value']
          assert_equal(cores, 1)


  def test_create_session_plain(self):
    lang = 'pyspark'
    properties = None
    session_key = self.api._get_session_key()

    with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
      get_spark_api.return_value = Mock(
        create_session=Mock(
          return_value={'id': '1'}
        ),
        get_session=Mock(
          return_value={'state': 'idle', 'log': ''}
        )
      )

      session = self.api.create_session(lang=lang, properties=properties)

      assert_equal(session['type'], 'pyspark')
      assert_equal(session['id'], '1')

      files_properties = [prop for prop in session['properties'] if prop['name'] == 'files']
      assert_true(files_properties, session['properties'])
      assert_equal(files_properties[0]['value'], [], session['properties'])


  def test_execute(self):
    with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session:
      with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
        notebook = Mock()
        snippet = {'statement': 'select * from test_table'}
        _get_snippet_session.return_value = {'id': '1'}

        get_spark_api.return_value = Mock(
          submit_statement=Mock(
            return_value={'id': 'test_id'}
          )
        )
        self.api._check_session = Mock(return_value={'id': '1'})

        response = self.api.execute(notebook, snippet)
        assert_equal(response['id'], 'test_id')

        get_spark_api.return_value = Mock(
          submit_statement=Mock()
        )
        assert_raises(Exception, self.api.execute, notebook, snippet)


  def test_check_status(self):
    with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session:
      with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
        notebook = Mock()
        snippet = {
          'result': {
            'handle': {
              'id': {'test_id'}
            }
          }
        }
        _get_snippet_session.return_value = {'id': '1'}

        get_spark_api.return_value = Mock(
          fetch_data=Mock(
            return_value={'state': 'test_state'}
          )
        )
        self.api._handle_session_health_check = Mock(return_value={'id': '1'})

        response = self.api.check_status(notebook, snippet)
        assert_equal(response['status'], 'test_state')

        get_spark_api.return_value = Mock(
          submit_statement=Mock()
        )
        assert_raises(Exception, self.api.check_status, notebook, snippet)
  

  def test_get_sample_data(self):
    snippet = Mock()
    self.api._execute = Mock(
      return_value='test_value'
    )
    self.api.create_session = Mock(
      return_value={
        'id': 'test_id'
      }
    )
    self.api._check_status_and_fetch_result = Mock(
      return_value={
        'data': 'test_data',
        'meta': 'test_meta'
      }
    )

    # When table is transactional
    self.api.describe_table = Mock(
      return_value={
        'stats': [{
          'data_type': 'transactional',
          'col_name': 'true',
          'comment': ''
        }]
      }
    )
    response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column')

    assert_equal(response['rows'], [])
    assert_equal(response['full_headers'], [])

    # When table is not transactional
    self.api.describe_table = Mock(
      return_value={
        'stats': [] # No details regarding transactionality is present in describe response
      }
    )
    response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column')

    assert_equal(response['rows'], 'test_data')
    assert_equal(response['full_headers'], 'test_meta')
  

  def test_get_select_query(self):
    # With operation as 'hello'
    response = self.api._get_select_query('test_db', 'test_table', 'test_column', 'hello')
    assert_equal(response, "SELECT 'Hello World!'")

    # Without column name
    response = self.api._get_select_query('test_db', 'test_table')
    assert_equal(response, 'SELECT *\nFROM test_db.test_table\nLIMIT 100\n')

    # With some column name
    response = self.api._get_select_query('test_db', 'test_table', 'test_column')
    assert_equal(response, 'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n')


  def test_describe_database(self):
    notebook = Mock()
    snippet = Mock()
    self.api.create_session = Mock(
      return_value={
        'id': 'test_id'
      }
    )
    self.api._execute = Mock(
      return_value='test_value'
    )
    self.api._check_status_and_fetch_result = Mock(
      return_value={
        'data': [
          ['Namespace Name', 'employees'],
          ['Comment', 'For software companies'],
          ['Location', 'hdfs://test_url:8020/warehouse/tablespace/external/hive/employees.db'],
          ['Owner', 'demo'],
          ['Properties', '((Create-by,Kevin), (Create-date,09/01/2019))']],
        'images': [],
        'meta': [
          {'comment': '', 'name': 'info_name', 'type': 'string'},
          {'comment': '', 'name': 'info_value', 'type': 'string'}],
        'type': 'table'}
    )
    response = self.api.describe_database(notebook, snippet, 'employees')

    assert_equal(response, {
      'comment': 'For software companies',
      'db_name': 'employees',
      'location': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/employees.db',
      'owner_name': 'demo',
      'parameters': '{Create-by=Kevin, Create-date=09/01/2019}',
      'status': 0})


  def test_describe_table(self):
    notebook = Mock()
    snippet = Mock()
    self.api.create_session = Mock(
      return_value={
        'id': 'test_id'
      }
    )
    self.api._execute = Mock(
      return_value='test_value'
    )
    self.api._check_status_and_fetch_result = Mock(
      return_value={
        'data': [
          ['nname', 'string', None],
          ['# Partition Information', '', ''],
          ['# col_name', 'data_type', 'comment'],
          ['state', 'string', 'null'],
          ['', '', ''],
          ['# Detailed Table Information', '', ''],
          ['Database', 'default', ''],
          ['Table', 'test_nonacid', ''],
          ['Owner', 'demo', ''],
          ['Created Time', 'Tue Jun 28 11:35:33 UTC 2022', ''],
          ['Last Access', 'UNKNOWN', ''],
          ['Created By', 'Spark 3.3.0.7.2.16.0-94', ''],
          ['Type', 'EXTERNAL', ''],
          ['Provider', 'hive', ''],
          ['Table Properties',
            '[TRANSLATED_TO_EXTERNAL=TRUE, bucketing_version=2, '
            'external.table.purge=TRUE, numFilesErasureCoded=0, '
            'transient_lastDdlTime=1656416152]',
            ''],
          ['Statistics', '6 bytes', ''],
          ['Location',
            'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid',
            ''],
          ['Serde Library',
            'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe',
            ''],
          ['InputFormat', 'org.apache.hadoop.mapred.TextInputFormat', ''],
          ['OutputFormat',
            'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
            ''],
          ['Storage Properties', '[serialization.format=1]', ''],
          ['Partition Provider', 'Catalog', '']],
        'images': [],
        'meta': [
          {'comment': '', 'name': 'col_name', 'type': 'string'},
          {'comment': '', 'name': 'data_type', 'type': 'string'},
          {'comment': '', 'name': 'comment', 'type': 'string'}],
        'type': 'table'
      }
    )
    response = self.api.describe_table(notebook, snippet, 'default', 'test_nonacid')

    assert_equal(response, {
      'cols': [{'comment': 'None', 'name': 'nname', 'type': 'string'}],
      'comment': '',
      'details': {'properties': {
        'create_time': 'Tue Jun 28 11:35:33 UTC 2022',
        'format': 'text',
        'owner': 'demo',
        'table_type': 'EXTERNAL'},
      'stats': [
        {
          'col_name': 'TRUE',
          'comment': '',
          'data_type': 'TRANSLATED_TO_EXTERNAL'},
        {
          'col_name': '2',
          'comment': '',
          'data_type': 'bucketing_version'},
        {
          'col_name': 'TRUE',
          'comment': '',
          'data_type': 'external.table.purge'},
        {
          'col_name': '0',
          'comment': '',
          'data_type': 'numFilesErasureCoded'},
        {
          'col_name': '1656416152',
          'comment': '',
          'data_type': 'transient_lastDdlTime'}]},
      'hdfs_link': '/filebrowser/view=/warehouse/tablespace/external/hive/test_nonacid',
      'is_view': False,
      'name': 'test_nonacid',
      'partition_keys': [{'name': 'state', 'type': 'string'}],
      'path_location': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid',
      'primary_keys': [],
      'properties': [{'col_name': '# Partition Information',
                      'comment': '',
                      'data_type': ''},
                      {'col_name': '# col_name',
                      'comment': 'comment',
                      'data_type': 'data_type'},
                      {'col_name': 'state', 'comment': 'null', 'data_type': 'string'},
                      {'col_name': '', 'comment': '', 'data_type': ''},
                      {'col_name': '# Detailed Table Information',
                      'comment': '',
                      'data_type': ''},
                      {'col_name': 'Database', 'comment': '', 'data_type': 'default'},
                      {'col_name': 'Table',
                      'comment': '',
                      'data_type': 'test_nonacid'},
                      {'col_name': 'Owner', 'comment': '', 'data_type': 'demo'},
                      {'col_name': 'Created Time',
                      'comment': '',
                      'data_type': 'Tue Jun 28 11:35:33 UTC 2022'},
                      {'col_name': 'Last Access',
                      'comment': '',
                      'data_type': 'UNKNOWN'},
                      {'col_name': 'Created By',
                      'comment': '',
                      'data_type': 'Spark 3.3.0.7.2.16.0-94'},
                      {'col_name': 'Type', 'comment': '', 'data_type': 'EXTERNAL'},
                      {'col_name': 'Provider', 'comment': '', 'data_type': 'hive'},
                      {'col_name': 'Table Properties',
                      'comment': '',
                      'data_type': '[TRANSLATED_TO_EXTERNAL=TRUE, '
                                    'bucketing_version=2, external.table.purge=TRUE, '
                                    'numFilesErasureCoded=0, '
                                    'transient_lastDdlTime=1656416152]'},
                      {'col_name': 'Statistics',
                      'comment': '',
                      'data_type': '6 bytes'},
                      {'col_name': 'Location',
                      'comment': '',
                      'data_type': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid'},
                      {'col_name': 'Serde Library',
                      'comment': '',
                      'data_type': 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'},
                      {'col_name': 'InputFormat',
                      'comment': '',
                      'data_type': 'org.apache.hadoop.mapred.TextInputFormat'},
                      {'col_name': 'OutputFormat',
                      'comment': '',
                      'data_type': 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'},
                      {'col_name': 'Storage Properties',
                      'comment': '',
                      'data_type': '[serialization.format=1]'},
                      {'col_name': 'Partition Provider',
                      'comment': '',
                      'data_type': 'Catalog'}],
      'stats': [{'col_name': 'TRUE',
                  'comment': '',
                  'data_type': 'TRANSLATED_TO_EXTERNAL'},
                {'col_name': '2', 'comment': '', 'data_type': 'bucketing_version'},
                {'col_name': 'TRUE',
                  'comment': '',
                  'data_type': 'external.table.purge'},
                {'col_name': '0',
                  'comment': '',
                  'data_type': 'numFilesErasureCoded'},
                {'col_name': '1656416152',
                  'comment': '',
                  'data_type': 'transient_lastDdlTime'}],
      'status': 0})


  def test_get_jobs(self):
    local_jobs = [
      {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'}
    ]
    jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG)
    assert_equal(jobs, local_jobs, jobs)

    yarn_jobs = [
      {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'}
    ]
    jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG)
    assert_equal(jobs, yarn_jobs, jobs)

Example #22

0

Show file

def get_api(request, snippet):
    from notebook.connectors.oozie_batch import OozieApi

    if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get():
        return OozieApi(user=request.user, request=request)

    if snippet.get('type') == 'report':
        snippet['type'] = 'impala'

    patch_snippet_for_connector(snippet)

    connector_name = snippet['type']

    if has_connectors() and snippet.get('type') == 'hello' and is_admin(
            request.user):
        interpreter = snippet.get('interpreter')
    else:
        interpreter = get_interpreter(connector_type=connector_name,
                                      user=request.user)

    interface = interpreter['interface']

    if get_cluster_config(request.user)['has_computes']:
        compute = json.loads(request.POST.get(
            'cluster',
            '""'))  # Via Catalog autocomplete API or Notebook create sessions.
        if compute == '""' or compute == 'undefined':
            compute = None
        if not compute and snippet.get('compute'):  # Via notebook.ko.js
            interpreter['compute'] = snippet['compute']

    LOG.debug('Selected interpreter %s interface=%s compute=%s' %
              (interpreter['type'], interface, interpreter.get('compute')
               and interpreter['compute']['name']))

    if interface == 'hiveserver2' or interface == 'hms':
        from notebook.connectors.hiveserver2 import HS2Api
        return HS2Api(user=request.user,
                      request=request,
                      interpreter=interpreter)
    elif interface == 'oozie':
        return OozieApi(user=request.user, request=request)
    elif interface == 'livy':
        from notebook.connectors.spark_shell import SparkApi
        return SparkApi(request.user, interpreter=interpreter)
    elif interface == 'livy-batch':
        from notebook.connectors.spark_batch import SparkBatchApi
        return SparkBatchApi(request.user, interpreter=interpreter)
    elif interface == 'text' or interface == 'markdown':
        from notebook.connectors.text import TextApi
        return TextApi(request.user)
    elif interface == 'rdbms':
        from notebook.connectors.rdbms import RdbmsApi
        return RdbmsApi(request.user,
                        interpreter=snippet['type'],
                        query_server=snippet.get('query_server'))
    elif interface == 'jdbc':
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('teradata') >= 0:
            from notebook.connectors.jdbc_teradata import JdbcApiTeradata
            return JdbcApiTeradata(request.user, interpreter=interpreter)
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('awsathena') >= 0:
            from notebook.connectors.jdbc_athena import JdbcApiAthena
            return JdbcApiAthena(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('presto') >= 0:
            from notebook.connectors.jdbc_presto import JdbcApiPresto
            return JdbcApiPresto(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('clickhouse') >= 0:
            from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse
            return JdbcApiClickhouse(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('vertica') >= 0:
            from notebook.connectors.jdbc_vertica import JdbcApiVertica
            return JdbcApiVertica(request.user, interpreter=interpreter)
        else:
            from notebook.connectors.jdbc import JdbcApi
            return JdbcApi(request.user, interpreter=interpreter)
    elif interface == 'teradata':
        from notebook.connectors.jdbc_teradata import JdbcApiTeradata
        return JdbcApiTeradata(request.user, interpreter=interpreter)
    elif interface == 'athena':
        from notebook.connectors.jdbc_athena import JdbcApiAthena
        return JdbcApiAthena(request.user, interpreter=interpreter)
    elif interface == 'presto':
        from notebook.connectors.jdbc_presto import JdbcApiPresto
        return JdbcApiPresto(request.user, interpreter=interpreter)
    elif interface == 'sqlalchemy':
        from notebook.connectors.sql_alchemy import SqlAlchemyApi
        return SqlAlchemyApi(request.user, interpreter=interpreter)
    elif interface == 'solr':
        from notebook.connectors.solr import SolrApi
        return SolrApi(request.user, interpreter=interpreter)
    elif interface == 'hbase':
        from notebook.connectors.hbase import HBaseApi
        return HBaseApi(request.user)
    elif interface == 'ksql':
        from notebook.connectors.ksql import KSqlApi
        return KSqlApi(request.user, interpreter=interpreter)
    elif interface == 'flink':
        from notebook.connectors.flink_sql import FlinkSqlApi
        return FlinkSqlApi(request.user, interpreter=interpreter)
    elif interface == 'kafka':
        from notebook.connectors.kafka import KafkaApi
        return KafkaApi(request.user)
    elif interface == 'pig':
        return OozieApi(user=request.user,
                        request=request)  # Backward compatibility until Hue 4
    else:
        raise PopupException(
            _('Notebook connector interface not recognized: %s') % interface)

Example #23

0

Show file

File: spark_tests.py Project: ziq211/hue

 def setUp(self):
     self.user = '******'
     self.api = SparkApi(self.user)