Ejemplo n.º 1
0
def _submit_coordinator(request, coordinator, mapping):
    try:
        wf = coordinator.workflow
        if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster():
            mapping['auto-cluster'] = {
                u'additionalClusterResourceTags': [],
                u'automaticTerminationCondition':
                u'EMPTY_JOB_QUEUE',  #'u'NONE',
                u'cdhVersion':
                u'CDH514',
                u'clouderaManagerPassword':
                u'guest',
                u'clouderaManagerUsername':
                u'guest',
                u'clusterName':
                u'analytics4',  # Add time variable
                u'computeWorkersConfiguration': {
                    u'bidUSDPerHr': 0,
                    u'groupSize': 0,
                    u'useSpot': False
                },
                u'environmentName':
                u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946',
                u'instanceBootstrapScript':
                u'',
                u'instanceType':
                u'm4.xlarge',
                u'jobSubmissionGroupName':
                u'',
                u'jobs': [
                    {
                        u'failureAction': u'INTERRUPT_JOB_QUEUE',
                        u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                        u'sparkJob': {
                            u'applicationArguments': ['5'],
                            u'jars': [
                                u's3a://datawarehouse-customer360/ETL/spark-examples.jar'
                            ],
                            u'mainClass':
                            u'org.apache.spark.examples.SparkPi'
                        }
                    },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': ['10'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
                    #             u'mainClass': u'org.apache.spark.examples.SparkPi'
                    #           }
                    #         },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': [u'filesystems3.conf'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'],
                    #             u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain',
                    #             u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf'
                    #           }
                    #         }
                ],
                u'namespaceName':
                u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410',
                u'publicKey':
                DEFAULT_PUBLIC_KEY.get(),
                u'serviceType':
                u'SPARK',
                u'workersConfiguration': {},
                u'workersGroupSize':
                u'3'
            }
        wf_dir = Submission(
            request.user,
            wf,
            request.fs,
            request.jt,
            mapping,
            local_tz=coordinator.data['properties']['timezone']).deploy()

        properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
        properties.update(mapping)

        submission = Submission(request.user,
                                coordinator,
                                request.fs,
                                request.jt,
                                properties=properties)
        job_id = submission.run()

        return job_id
    except RestException, ex:
        LOG.exception('Error submitting coordinator')
        raise PopupException(_("Error submitting coordinator %s") %
                             (coordinator, ),
                             detail=ex._headers.get('oozie-error-message', ex),
                             error_code=200)
Ejemplo n.º 2
0
def _submit_coordinator(request, coordinator, mapping):
  try:
    wf = coordinator.workflow
    if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster():
      mapping['auto-cluster'] = {
        u'additionalClusterResourceTags': [],
        u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE',
        u'cdhVersion': u'CDH514',
        u'clouderaManagerPassword': u'guest',
        u'clouderaManagerUsername': u'guest',
        u'clusterName': u'analytics4', # Add time variable
        u'computeWorkersConfiguration': {
          u'bidUSDPerHr': 0,
          u'groupSize': 0,
          u'useSpot': False
        },
        u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946',
        u'instanceBootstrapScript': u'',
        u'instanceType': u'm4.xlarge',
        u'jobSubmissionGroupName': u'',
        u'jobs': [{
            u'failureAction': u'INTERRUPT_JOB_QUEUE',
            u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
            u'sparkJob': {
              u'applicationArguments': ['5'],
              u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
              u'mainClass': u'org.apache.spark.examples.SparkPi'
            }
          },
  #         {
  #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
  #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
  #           u'sparkJob': {
  #             u'applicationArguments': ['10'],
  #             u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
  #             u'mainClass': u'org.apache.spark.examples.SparkPi'
  #           }
  #         },
  #         {
  #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
  #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
  #           u'sparkJob': {
  #             u'applicationArguments': [u'filesystems3.conf'],
  #             u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'],
  #             u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain',
  #             u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf'
  #           }
  #         }
        ],
        u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410',
        u'publicKey': DEFAULT_PUBLIC_KEY.get(),
        u'serviceType': u'SPARK',
        u'workersConfiguration': {},
        u'workersGroupSize': u'3'
      }
    wf_dir = Submission(request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy()

    properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
    properties.update(mapping)

    submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting coordinator')
    raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
Ejemplo n.º 3
0
def get_api(request, snippet):
    from notebook.connectors.oozie_batch import OozieApi

    if snippet.get('wasBatchExecuted'):
        return OozieApi(user=request.user, request=request)

    if snippet['type'] == 'report':
        snippet['type'] = 'impala'

    interpreter = [
        interpreter for interpreter in get_ordered_interpreters(request.user)
        if snippet['type'] in (interpreter['type'], interpreter['interface'])
    ]
    if not interpreter:
        if snippet['type'] == 'hbase':
            interpreter = [{
                'name': 'hbase',
                'type': 'hbase',
                'interface': 'hbase',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'kafka':
            interpreter = [{
                'name': 'kafka',
                'type': 'kafka',
                'interface': 'kafka',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'solr':
            interpreter = [{
                'name': 'solr',
                'type': 'solr',
                'interface': 'solr',
                'options': {},
                'is_sql': False
            }]
        elif snippet['type'] == 'custom':
            interpreter = [{
                'name': snippet['name'],
                'type': snippet['type'],
                'interface': snippet['interface'],
                'options': snippet.get('options', {}),
                'is_sql': False
            }]
        else:
            raise PopupException(
                _('Snippet type %(type)s is not configured.') % snippet)

    interpreter = interpreter[0]
    interface = interpreter['interface']

    if CONNECTORS.IS_ENABLED.get():
        cluster = {
            'connector': snippet['type'],
            'id': interpreter['type'],
        }
        snippet['type'] = snippet['type'].split('-', 2)[0]
        cluster.update(interpreter['options'])
    # Multi cluster
    elif has_multi_cluster():
        cluster = json.loads(request.POST.get(
            'cluster',
            '""'))  # Via Catalog autocomplete API or Notebook create sessions
        if cluster == '""' or cluster == 'undefined':
            cluster = None
        if not cluster and snippet.get('compute'):  # Via notebook.ko.js
            cluster = snippet['compute']
    else:
        cluster = None

    cluster_name = cluster.get('id') if cluster else None

    if cluster and 'altus:dataware:k8s' in cluster_name:
        interface = 'hiveserver2'
    elif cluster and 'crn:altus:dataware:' in cluster_name:
        interface = 'altus-adb'
    elif cluster and 'crn:altus:dataeng:' in cluster_name:
        interface = 'dataeng'

    LOG.info('Selected cluster %s %s interface %s' %
             (cluster_name, cluster, interface))
    snippet['interface'] = interface

    if interface.startswith('hiveserver2') or interface == 'hms':
        from notebook.connectors.hiveserver2 import HS2Api
        return HS2Api(user=request.user,
                      request=request,
                      cluster=cluster,
                      interface=interface)
    elif interface == 'oozie':
        return OozieApi(user=request.user, request=request)
    elif interface == 'livy':
        from notebook.connectors.spark_shell import SparkApi
        return SparkApi(request.user)
    elif interface == 'livy-batch':
        from notebook.connectors.spark_batch import SparkBatchApi
        return SparkBatchApi(request.user)
    elif interface == 'text' or interface == 'markdown':
        from notebook.connectors.text import TextApi
        return TextApi(request.user)
    elif interface == 'rdbms':
        from notebook.connectors.rdbms import RdbmsApi
        return RdbmsApi(request.user,
                        interpreter=snippet['type'],
                        query_server=snippet.get('query_server'))
    elif interface == 'altus-adb':
        from notebook.connectors.altus_adb import AltusAdbApi
        return AltusAdbApi(user=request.user,
                           cluster_name=cluster_name,
                           request=request)
    elif interface == 'dataeng':
        from notebook.connectors.dataeng import DataEngApi
        return DataEngApi(user=request.user,
                          request=request,
                          cluster_name=cluster_name)
    elif interface == 'jdbc':
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('teradata') >= 0:
            from notebook.connectors.jdbc_teradata import JdbcApiTeradata
            return JdbcApiTeradata(request.user, interpreter=interpreter)
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('awsathena') >= 0:
            from notebook.connectors.jdbc_athena import JdbcApiAthena
            return JdbcApiAthena(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('presto') >= 0:
            from notebook.connectors.jdbc_presto import JdbcApiPresto
            return JdbcApiPresto(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('clickhouse') >= 0:
            from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse
            return JdbcApiClickhouse(request.user, interpreter=interpreter)
        else:
            from notebook.connectors.jdbc import JdbcApi
            return JdbcApi(request.user, interpreter=interpreter)
    elif interface == 'teradata':
        from notebook.connectors.jdbc import JdbcApiTeradata
        return JdbcApiTeradata(request.user, interpreter=interpreter)
    elif interface == 'athena':
        from notebook.connectors.jdbc import JdbcApiAthena
        return JdbcApiAthena(request.user, interpreter=interpreter)
    elif interface == 'presto':
        from notebook.connectors.jdbc_presto import JdbcApiPresto
        return JdbcApiPresto(request.user, interpreter=interpreter)
    elif interface == 'sqlalchemy':
        from notebook.connectors.sqlalchemyapi import SqlAlchemyApi
        return SqlAlchemyApi(request.user, interpreter=interpreter)
    elif interface == 'solr':
        from notebook.connectors.solr import SolrApi
        return SolrApi(request.user, interpreter=interpreter)
    elif interface == 'hbase':
        from notebook.connectors.hbase import HBaseApi
        return HBaseApi(request.user)
    elif interface == 'kafka':
        from notebook.connectors.kafka import KafkaApi
        return KafkaApi(request.user)
    elif interface == 'pig':
        return OozieApi(user=request.user,
                        request=request)  # Backward compatibility until Hue 4
    else:
        raise PopupException(
            _('Notebook connector interface not recognized: %s') % interface)