Example #1
0
def get_service(process, cluster=None, instance=None):
    cm_cluster = None
    if cluster:
        cm_cluster = get_cloudera_cluster(cluster)
    elif instance:
        cm_cluster = get_cloudera_cluster(instance.node_group.cluster)
    else:
        raise ValueError(_("'cluster' or 'instance' argument missed"))

    if process in ['NAMENODE', 'DATANODE', 'SECONDARYNAMENODE']:
        return cm_cluster.get_service(HDFS_SERVICE_NAME)
    elif process in ['RESOURCEMANAGER', 'NODEMANAGER', 'JOBHISTORY']:
        return cm_cluster.get_service(YARN_SERVICE_NAME)
    elif process in ['OOZIE_SERVER']:
        return cm_cluster.get_service(OOZIE_SERVICE_NAME)
    elif process in ['HIVESERVER2', 'HIVEMETASTORE', 'WEBHCAT']:
        return cm_cluster.get_service(HIVE_SERVICE_NAME)
    elif process in ['HUE_SERVER']:
        return cm_cluster.get_service(HUE_SERVICE_NAME)
    elif process in ['SPARK_YARN_HISTORY_SERVER']:
        return cm_cluster.get_service(SPARK_SERVICE_NAME)
    else:
        raise ValueError(
            _("Process %(process)s is not supported by CDH plugin") %
            {'process': process})
Example #2
0
    def __call__(self, req):
        """Ensures that tenants in url and token are equal.

        Handle incoming request by checking tenant info prom the headers and
        url ({tenant_id} url attribute).

        Pass request downstream on success.
        Reject request if tenant_id from headers not equals to tenant_id from
        url.
        """
        token_tenant = req.environ.get("HTTP_X_TENANT_ID")
        if not token_tenant:
            LOG.warning(_LW("Can't get tenant_id from env"))
            raise ex.HTTPServiceUnavailable()

        path = req.environ["PATH_INFO"]
        if path != "/":
            version, url_tenant, rest = commons.split_path(path, 3, 3, True)
            if not version or not url_tenant or not rest:
                LOG.warning(_LW("Incorrect path: {path}").format(path=path))
                raise ex.HTTPNotFound(_("Incorrect path"))

            if token_tenant != url_tenant:
                LOG.debug("Unauthorized: token tenant != requested tenant")
                raise ex.HTTPUnauthorized(_("Token tenant != requested tenant"))
        return self.application
Example #3
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances))

        ips_assigned = set()
        self._ips_assign(ips_assigned, cluster, instances)

        LOG.info(
            _LI("Cluster {cluster_id}: all instances have IPs assigned")
            .format(cluster_id=cluster.id))

        cluster = conductor.cluster_get(context.ctx(), cluster)
        instances = g.get_instances(cluster, ips_assigned)

        cpo.add_provisioning_step(
            cluster.id, _("Wait for instance accessibility"), len(instances))

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info(_LI("Cluster {cluster_id}: all instances are accessible")
                 .format(cluster_id=cluster.id))
Example #4
0
def domain_for_proxy():
    '''Return the proxy domain or None

    If configured to use the proxy domain, this function will return that
    domain. If not configured to use the proxy domain, this function will
    return None. If the proxy domain can't be found this will raise an
    exception.

    :returns: A Keystone Domain object or None.
    :raises ConfigurationError: If the domain is requested but not specified.
    :raises NotFoundException: If the domain name is specified but cannot be
                               found.

    '''
    if CONF.use_domain_for_proxy_users is False:
        return None
    if CONF.proxy_user_domain_name is None:
        raise ex.ConfigurationError(_('Proxy domain requested but not '
                                      'specified.'))
    admin = k.client_for_admin()

    global PROXY_DOMAIN
    if not PROXY_DOMAIN:
        domain_list = admin.domains.list(name=CONF.proxy_user_domain_name)
        if len(domain_list) == 0:
            raise ex.NotFoundException(value=CONF.proxy_user_domain_name,
                                       message=_('Failed to find domain %s'))
        # the domain name should be globally unique in Keystone
        if len(domain_list) > 1:
            raise ex.NotFoundException(value=CONF.proxy_user_domain_name,
                                       message=_('Unexpected results found '
                                                 'when searching for domain '
                                                 '%s'))
        PROXY_DOMAIN = domain_list[0]
    return PROXY_DOMAIN
Example #5
0
 def wait_ambari_requests(self, requests, cluster_name):
     requests = set(requests)
     failed = []
     while len(requests) > 0:
         completed, not_completed = set(), set()
         for req_id in requests:
             request = self.get_request_info(cluster_name, req_id)
             status = request.get("request_status")
             if status == 'COMPLETED':
                 completed.add(req_id)
             elif status in ['IN_PROGRESS', 'PENDING']:
                 not_completed.add(req_id)
             else:
                 failed.append(request)
         if failed:
             msg = _("Some Ambari request(s) "
                     "not in COMPLETED state: %(description)s.")
             descrs = []
             for req in failed:
                 descr = _(
                     "request %(id)d: %(name)s - in status %(status)s")
                 descrs.append(descr %
                               {'id': req.get("id"),
                                'name': req.get("request_context"),
                                'status': req.get("request_status")})
             raise p_exc.HadoopProvisionError(msg % {'description': descrs})
         requests = not_completed
         context.sleep(5)
         LOG.debug("Waiting for %d ambari request(s) to be completed",
                   len(not_completed))
     LOG.debug("All ambari requests have been completed")
Example #6
0
File: api.py Project: lhcxx/sahara
def request_data():
    if hasattr(flask.request, 'parsed_data'):
        return flask.request.parsed_data

    if not flask.request.content_length > 0:
        LOG.debug("Empty body provided in request")
        return dict()

    if flask.request.file_upload:
        return flask.request.data

    deserializer = None
    content_type = flask.request.mimetype
    if not content_type or content_type in RT_JSON:
        deserializer = wsgi.JSONDeserializer()
    elif content_type in RT_XML:
        abort_and_log(400, _("XML requests are not supported yet"))
        # deserializer = XMLDeserializer()
    else:
        abort_and_log(400,
                      _("Content type '%s' isn't supported") % content_type)

    # parsed request data to avoid unwanted re-parsings
    parsed_data = deserializer.deserialize(flask.request.data)['body']
    flask.request.parsed_data = parsed_data

    return flask.request.parsed_data
Example #7
0
def validate_number_of_datanodes(cluster, scaled_groups, default_configs):
    dfs_replication = 0
    for config in default_configs:
        if config.name == "dfs.replication":
            dfs_replication = config.default_value
    conf = cluster.cluster_configs
    if "HDFS" in conf and "dfs.replication" in conf["HDFS"]:
        dfs_replication = conf["HDFS"]["dfs.replication"]

    if not scaled_groups:
        scaled_groups = {}
    dn_count = 0
    for ng in cluster.node_groups:
        if "DATANODE" in ng.node_processes:
            if ng.id in scaled_groups:
                dn_count += scaled_groups[ng.id]
            else:
                dn_count += ng.count

    if dn_count < int(dfs_replication):
        raise ex.InvalidComponentCountException(
            "datanode",
            _("%s or more") % dfs_replication,
            dn_count,
            _("Number of %(dn)s instances should not be less " "than %(replication)s")
            % {"dn": "DATANODE", "replication": "dfs.replication"},
        )
Example #8
0
    def _exec_ambari_command(self, ambari_info, body, cmd_uri):

        LOG.debug('PUT URI: {0}'.format(cmd_uri))
        result = self._put(cmd_uri, ambari_info, data=body)
        if result.status_code == 202:
            LOG.debug(
                'PUT response: {0}'.format(result.text))
            json_result = json.loads(result.text)
            href = json_result['href'] + '/tasks?fields=Tasks/status'
            success = self._wait_for_async_request(href, ambari_info)
            if success:
                LOG.info(
                    _LI("Successfully changed state of Hadoop components "))
            else:
                LOG.critical(_LC('Failed to change state of Hadoop '
                                 'components'))
                raise ex.HadoopProvisionError(
                    _('Failed to change state of Hadoop components'))

        else:
            LOG.error(
                _LE('Command failed. Status: %(status)s, response: '
                    '%(response)s'),
                {'status': result.status_code, 'response': result.text})
            raise ex.HadoopProvisionError(_('Hadoop/Ambari command failed.'))
Example #9
0
def _check_storm(cluster):
    dr_count = utils.get_instances_count(cluster, common.DRPC_SERVER)
    ni_count = utils.get_instances_count(cluster, common.NIMBUS)
    su_count = utils.get_instances_count(cluster, common.STORM_UI_SERVER)
    sv_count = utils.get_instances_count(cluster, common.SUPERVISOR)
    if dr_count > 1:
        raise ex.InvalidComponentCountException(common.DRPC_SERVER,
                                                _("0 or 1"), dr_count)
    if ni_count > 1:
        raise ex.InvalidComponentCountException(common.NIMBUS,
                                                _("0 or 1"), ni_count)
    if su_count > 1:
        raise ex.InvalidComponentCountException(common.STORM_UI_SERVER,
                                                _("0 or 1"), su_count)
    if dr_count == 0 and ni_count == 1:
        raise ex.RequiredServiceMissingException(
            common.DRPC_SERVER, required_by=common.NIMBUS)
    if dr_count == 1 and ni_count == 0:
        raise ex.RequiredServiceMissingException(
            common.NIMBUS, required_by=common.DRPC_SERVER)
    if su_count == 1 and (dr_count == 0 or ni_count == 0):
        raise ex.RequiredServiceMissingException(
            common.NIMBUS, required_by=common.STORM_UI_SERVER)
    if dr_count == 1 and sv_count == 0:
        raise ex.RequiredServiceMissingException(
            common.SUPERVISOR, required_by=common.DRPC_SERVER)
    if sv_count > 0 and dr_count == 0:
        raise ex.RequiredServiceMissingException(
            common.DRPC_SERVER, required_by=common.SUPERVISOR)
Example #10
0
    def _add_hosts_and_components(
            self, cluster_spec, servers, ambari_info, name):

        add_host_url = 'http://{0}/api/v1/clusters/{1}/hosts/{2}'
        add_host_component_url = ('http://{0}/api/v1/clusters/{1}'
                                  '/hosts/{2}/host_components/{3}')
        for host in servers:
            hostname = host.instance.fqdn().lower()
            result = self._post(
                add_host_url.format(ambari_info.get_address(), name, hostname),
                ambari_info)
            if result.status_code != 201:
                LOG.error(
                    _LE('Create host command failed. {0}').format(result.text))
                raise ex.HadoopProvisionError(
                    _('Failed to add host: %s') % result.text)

            node_group_name = host.node_group.name
            # TODO(jspeidel): ensure that node group exists
            node_group = cluster_spec.node_groups[node_group_name]
            for component in node_group.components:
                # don't add any AMBARI components
                if component.find('AMBARI') != 0:
                    result = self._post(add_host_component_url.format(
                        ambari_info.get_address(), name, hostname, component),
                        ambari_info)
                    if result.status_code != 201:
                        LOG.error(
                            _LE('Create host_component command failed. %s'),
                            result.text)
                        raise ex.HadoopProvisionError(
                            _('Failed to add host component: %s')
                            % result.text)
Example #11
0
    def _install_services(self, cluster_name, ambari_info):
        LOG.info(_LI('Installing required Hadoop services ...'))

        ambari_address = ambari_info.get_address()
        install_url = ('http://{0}/api/v1/clusters/{'
                       '1}/services?ServiceInfo/state=INIT'.format(
                           ambari_address, cluster_name))
        body = ('{"RequestInfo" : { "context" : "Install all services" },'
                '"Body" : {"ServiceInfo": {"state" : "INSTALLED"}}}')

        result = self._put(install_url, ambari_info, data=body)

        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(self._get_async_request_uri(
                ambari_info, cluster_name, request_id),
                ambari_info)
            if success:
                LOG.info(_LI("Install of Hadoop stack successful."))
                self._finalize_ambari_state(ambari_info)
            else:
                LOG.critical(_LC('Install command failed.'))
                raise ex.HadoopProvisionError(
                    _('Installation of Hadoop stack failed.'))
        elif result.status_code != 200:
            LOG.error(
                _LE('Install command failed. {0}').format(result.text))
            raise ex.HadoopProvisionError(
                _('Installation of Hadoop stack failed.'))
Example #12
0
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Vanilla plugin cannot scale nodegroup" " with processes: %s") % " ".join(ng.node_processes),
                    )

        dn_amount = len(vu.get_datanodes(cluster))
        rep_factor = c_helper.get_config_value("HDFS", "dfs.replication", cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name,
                _(
                    "Vanilla plugin cannot shrink cluster because "
                    "it would be not enough nodes for replicas "
                    "(replication factor is %s)"
                )
                % rep_factor,
            )
Example #13
0
    def validate(self, cluster):
        nn_count = sum([ng.count for ng in utils.get_node_groups(cluster, "namenode")])
        if nn_count != 1:
            raise ex.InvalidComponentCountException("namenode", 1, nn_count)

        snn_count = sum([ng.count for ng in utils.get_node_groups(cluster, "secondarynamenode")])
        if snn_count > 1:
            raise ex.InvalidComponentCountException("secondarynamenode", _("0 or 1"), snn_count)

        jt_count = sum([ng.count for ng in utils.get_node_groups(cluster, "jobtracker")])

        if jt_count > 1:
            raise ex.InvalidComponentCountException("jobtracker", _("0 or 1"), jt_count)

        oozie_count = sum([ng.count for ng in utils.get_node_groups(cluster, "oozie")])

        if oozie_count > 1:
            raise ex.InvalidComponentCountException("oozie", _("0 or 1"), oozie_count)

        hive_count = sum([ng.count for ng in utils.get_node_groups(cluster, "hiveserver")])
        if jt_count == 0:

            tt_count = sum([ng.count for ng in utils.get_node_groups(cluster, "tasktracker")])
            if tt_count > 0:
                raise ex.RequiredServiceMissingException("jobtracker", required_by="tasktracker")

            if oozie_count > 0:
                raise ex.RequiredServiceMissingException("jobtracker", required_by="oozie")

            if hive_count > 0:
                raise ex.RequiredServiceMissingException("jobtracker", required_by="hive")

        if hive_count > 1:
            raise ex.InvalidComponentCountException("hive", _("0 or 1"), hive_count)
Example #14
0
def check_mains_libs(data, **kwargs):
    mains = data.get("mains", [])
    libs = data.get("libs", [])
    job_type, subtype = edp.split_job_type(data.get("type"))
    streaming = (job_type == edp.JOB_TYPE_MAPREDUCE and
                 subtype == edp.JOB_SUBTYPE_STREAMING)

    # These types must have a value in mains and may also use libs
    if job_type in [edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE,
                    edp.JOB_TYPE_SHELL, edp.JOB_TYPE_SPARK,
                    edp.JOB_TYPE_STORM]:
        if not mains:
            if job_type in [edp.JOB_TYPE_SPARK, edp.JOB_TYPE_STORM]:
                msg = _(
                    "%s job requires main application jar") % data.get("type")
            else:
                msg = _("%s flow requires main script") % data.get("type")
            raise e.InvalidDataException(msg)

        # Check for overlap
        if set(mains).intersection(set(libs)):
            raise e.InvalidDataException(_("'mains' and 'libs' overlap"))

    else:
        # Java and MapReduce require libs, but MapReduce.Streaming does not
        if not streaming and not libs:
            raise e.InvalidDataException(_("%s flow requires libs") %
                                         data.get("type"))
        if mains:
            raise e.InvalidDataException(_("%s flow does not use mains") %
                                         data.get("type"))

    # Make sure that all referenced binaries exist
    _check_binaries(mains)
    _check_binaries(libs)
Example #15
0
def render(res=None, resp_type=None, status=None, **kwargs):
    if not res:
        res = {}
    if type(res) is dict:
        res.update(kwargs)
    elif kwargs:
        # can't merge kwargs into the non-dict res
        abort_and_log(500,
                      _("Non-dict and non-empty kwargs passed to render"))

    status_code = getattr(flask.request, 'status_code', None)
    if status:
        status_code = status
    if not status_code:
        status_code = 200

    if not resp_type:
        resp_type = getattr(flask.request, 'resp_type', RT_JSON)

    if not resp_type:
        resp_type = RT_JSON

    serializer = None
    if "application/json" in resp_type:
        resp_type = RT_JSON
        serializer = wsgi.JSONDictSerializer()
    else:
        abort_and_log(400, _("Content type '%s' isn't supported") % resp_type)

    body = serializer.serialize(res)
    resp_type = str(resp_type)

    return flask.Response(response=body, status=status_code,
                          mimetype=resp_type)
Example #16
0
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and ("datanode" in
                                                   ng.node_processes):
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name, _("Spark plugin cannot scale nodegroup"
                                   " with processes: %s") %
                        ' '.join(ng.node_processes))

        dn_amount = len(utils.get_instances(cluster, "datanode"))
        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name, _("Spark plugin cannot shrink cluster because "
                                "there would be not enough nodes for HDFS "
                                "replicas (replication factor is %s)") %
                rep_factor)
Example #17
0
def generate_key_pair(key_length=2048):
    """Create RSA key pair with specified number of bits in key.

    Returns tuple of private and public keys.
    """
    with tempfiles.tempdir() as tmpdir:
        keyfile = os.path.join(tmpdir, 'tempkey')
        args = [
            'ssh-keygen',
            '-q',  # quiet
            '-N', '',  # w/o passphrase
            '-t', 'rsa',  # create key of rsa type
            '-f', keyfile,  # filename of the key file
            '-C', 'Generated-by-Sahara'  # key comment
        ]
        if key_length is not None:
            args.extend(['-b', key_length])
        processutils.execute(*args)
        if not os.path.exists(keyfile):
            raise ex.SystemError(_("Private key file hasn't been created"))
        private_key = open(keyfile).read()
        public_key_path = keyfile + '.pub'
        if not os.path.exists(public_key_path):
            raise ex.SystemError(_("Public key file hasn't been created"))
        public_key = open(public_key_path).read()

        return private_key, public_key
Example #18
0
    def __call__(self, req):
        """Ensures that the requested and token tenants match

        Handle incoming requests by checking tenant info from the
        headers and url ({tenant_id} url attribute), if using v1 or v1.1
        APIs. If using the v2 API, this function will check the token
        tenant and the requested tenent in the headers.

        Pass request downstream on success.
        Reject request if tenant_id from headers is not equal to the
        tenant_id from url or v2 project header.
        """
        path = req.environ['PATH_INFO']
        if path != '/':
            token_tenant = req.environ.get("HTTP_X_TENANT_ID")
            if not token_tenant:
                LOG.warning(_LW("Can't get tenant_id from env"))
                raise ex.HTTPServiceUnavailable()

            if path.startswith('/v2'):
                version, rest = commons.split_path(path, 2, 2, True)
                requested_tenant = req.headers.get('OpenStack-Project-ID')
            else:
                version, requested_tenant, rest = commons.split_path(
                    path, 3, 3, True)

            if not version or not requested_tenant or not rest:
                LOG.warning(_LW("Incorrect path: {path}").format(path=path))
                raise ex.HTTPNotFound(_("Incorrect path"))

            if token_tenant != requested_tenant:
                LOG.debug("Unauthorized: token tenant != requested tenant")
                raise ex.HTTPUnauthorized(
                    _('Token tenant != requested tenant'))
        return self.application
Example #19
0
    def _map_to_user_inputs(self, hadoop_version, configs):
        config_objs = self.get_all_configs(hadoop_version)

        # convert config objects to applicable_target -> config_name -> obj
        config_objs_map = {}
        for config_obj in config_objs:
            applicable_target = config_obj.applicable_target
            confs = config_objs_map.get(applicable_target, {})
            confs[config_obj.name] = config_obj
            config_objs_map[applicable_target] = confs

        # iterate over all configs and append UserInputs to result list
        result = []
        for applicable_target in configs:
            for config_name in configs[applicable_target]:
                confs = config_objs_map.get(applicable_target)
                if not confs:
                    raise ex.ConfigurationError(
                        _("Can't find applicable target "
                          "'%(applicable_target)s' for '%(config_name)s'")
                        % {"applicable_target": applicable_target,
                           "config_name": config_name})
                conf = confs.get(config_name)
                if not conf:
                    raise ex.ConfigurationError(
                        _("Can't find config '%(config_name)s' "
                          "in '%(applicable_target)s'")
                        % {"config_name": config_name,
                           "applicable_target": applicable_target})
                result.append(UserInput(
                    conf, configs[applicable_target][config_name]))

        return sorted(result)
Example #20
0
def url_for(service_catalog, service_type, admin=False, endpoint_type=None):
    if not endpoint_type:
        endpoint_type = 'publicURL'
    if admin:
        endpoint_type = 'adminURL'

    service = _get_service_from_catalog(service_catalog, service_type)

    if service:
        endpoints = service['endpoints']
        if CONF.os_region_name:
            endpoints = [e for e in endpoints
                         if e['region'] == CONF.os_region_name]
        try:
            return _get_endpoint_url(endpoints, endpoint_type)
        except Exception:
            raise ex.SystemError(
                _("Endpoint with type %(type)s is not found for service "
                  "%(service)s")
                % {'type': endpoint_type,
                   'service': service_type})

    else:
        raise ex.SystemError(
            _('Service "%s" not found in service catalog') % service_type)
Example #21
0
    def start_services(self, cluster_name, cluster_spec, ambari_info):
        start_url = ('http://{0}/api/v1/clusters/{1}/services?ServiceInfo/'
                     'state=INSTALLED'.format(
                         ambari_info.get_address(), cluster_name))
        body = ('{"RequestInfo" : { "context" : "Start all services" },'
                '"Body" : {"ServiceInfo": {"state" : "STARTED"}}}')

        self._fire_service_start_notifications(
            cluster_name, cluster_spec, ambari_info)
        result = self._put(start_url, ambari_info, data=body)
        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(
                self._get_async_request_uri(ambari_info, cluster_name,
                                            request_id), ambari_info)
            if success:
                LOG.info(
                    _LI("Successfully started Hadoop cluster."))
                LOG.info(_LI('Ambari server address: {server_address}')
                         .format(server_address=ambari_info.get_address()))

            else:
                LOG.error(_LE('Failed to start Hadoop cluster.'))
                raise ex.HadoopProvisionError(
                    _('Start of Hadoop services failed.'))

        elif result.status_code != 200:
            LOG.error(
                _LE('Start command failed. Status: {status}, '
                    'response: {response}').format(status=result.status_code,
                                                   response=result.text))
            raise ex.HadoopProvisionError(
                _('Start of Hadoop services failed.'))
Example #22
0
def job_binary_internal_create(context, values):
    """Returns a JobBinaryInternal that does not contain a data field

    The data column uses deferred loading.
    """
    values["datasize"] = len(values["data"])
    datasize_KB = values["datasize"] / 1024.0
    if datasize_KB > CONF.job_binary_max_KB:
        raise ex.DataTooBigException(
            round(datasize_KB, 1), CONF.job_binary_max_KB,
            _("Size of internal binary (%(size)sKB) is greater "
              "than the maximum (%(maximum)sKB)"))

    job_binary_int = m.JobBinaryInternal()
    job_binary_int.update(values)

    session = get_session()
    try:
        with session.begin():
            session.add(job_binary_int)
    except db_exc.DBDuplicateEntry as e:
        raise ex.DBDuplicateEntry(
            _("Duplicate entry for JobBinaryInternal: %s") % e.columns)

    return job_binary_internal_get(context, job_binary_int.id)
Example #23
0
    def __call__(self, req):
        """Ensures that tenants in url and token are equal.

        Handle incoming request by checking tenant info prom the headers and
        url ({tenant_id} url attribute).

        Pass request downstream on success.
        Reject request if tenant_id from headers not equals to tenant_id from
        url.
        """
        token_tenant = req.environ.get("HTTP_X_TENANT_ID")
        if not token_tenant:
            LOG.warning("Can't get tenant_id from env")
            raise ex.HTTPServiceUnavailable()

        path = req.environ['PATH_INFO']
        if path != '/':
            try:
                version, possibly_url_tenant, rest = (
                    strutils.split_path(path, 2, 3, True)
                )
            except ValueError:
                LOG.warning("Incorrect path: {path}".format(path=path))
                raise ex.HTTPNotFound(_("Incorrect path"))

            if uuidutils.is_uuid_like(possibly_url_tenant):
                url_tenant = possibly_url_tenant
                if token_tenant != url_tenant:
                    LOG.debug("Unauthorized: token tenant != requested tenant")
                    raise ex.HTTPUnauthorized(
                        _('Token tenant != requested tenant'))
        return self.application
Example #24
0
def cluster_create(context, values):
    values = values.copy()
    cluster = m.Cluster()
    node_groups = values.pop("node_groups", [])
    cluster.update(values)

    session = get_session()
    with session.begin():
        try:
            cluster.save(session=session)
        except db_exc.DBDuplicateEntry as e:
            raise ex.DBDuplicateEntry(
                _("Duplicate entry for Cluster: %s") % e.columns)

        try:
            for ng in node_groups:
                node_group = m.NodeGroup()
                node_group.update({"cluster_id": cluster.id})
                node_group.update(ng)
                node_group.save(session=session)
        except db_exc.DBDuplicateEntry as e:
            raise ex.DBDuplicateEntry(
                _("Duplicate entry for NodeGroup: %s") % e.columns)

    return cluster_get(context, cluster.id)
Example #25
0
def cluster_template_create(context, values):
    values = values.copy()
    cluster_template = m.ClusterTemplate()
    node_groups = values.pop("node_groups") or []
    cluster_template.update(values)

    session = get_session()
    with session.begin():
        try:
            cluster_template.save(session=session)
        except db_exc.DBDuplicateEntry as e:
            raise ex.DBDuplicateEntry(
                _("Duplicate entry for ClusterTemplate: %s") % e.columns)

        try:
            for ng in node_groups:
                node_group = m.TemplatesRelation()
                node_group.update({"cluster_template_id": cluster_template.id})
                node_group.update(ng)
                node_group.save(session=session)

        except db_exc.DBDuplicateEntry as e:
            raise ex.DBDuplicateEntry(
                _("Duplicate entry for TemplatesRelation: %s") % e.columns)

    return cluster_template_get(context, cluster_template.id)
Example #26
0
 def _await_cldb(self, cluster_context, instances=None, timeout=600):
     instances = instances or cluster_context.get_instances()
     cldb_node = cluster_context.get_instance(mfs.CLDB)
     start_time = timeutils.utcnow()
     retry_count = 0
     with cldb_node.remote() as r:
         LOG.debug("Waiting {count} seconds for CLDB initialization".format(
             count=timeout))
         while timeutils.delta_seconds(start_time,
                                       timeutils.utcnow()) < timeout:
             ec, out = r.execute_command(NODE_LIST_CMD,
                                         raise_when_error=False)
             resp = json.loads(out)
             status = resp['status']
             if str(status).lower() == 'ok':
                 ips = [n['ip'] for n in resp['data']]
                 retry_count += 1
                 for i in instances:
                     if (i.management_ip not in ips
                             and retry_count > DEFAULT_RETRY_COUNT):
                         raise ex.HadoopProvisionError(_(
                             "Node failed to connect to CLDB: %s") %
                             i.management_ip)
                 break
             else:
                 context.sleep(DELAY)
         else:
             raise ex.HadoopProvisionError(_("CLDB failed to start"))
def validate_number_of_datanodes(cluster, scaled_groups, default_configs):
    dfs_replication = 0
    for config in default_configs:
        if config.name == 'dfs.replication':
            dfs_replication = config.default_value
    conf = cluster.cluster_configs
    if 'HDFS' in conf and 'dfs.replication' in conf['HDFS']:
        dfs_replication = conf['HDFS']['dfs.replication']

    if not scaled_groups:
        scaled_groups = {}
    dn_count = 0
    for ng in cluster.node_groups:
        if 'DATANODE' in ng.node_processes:
            if ng.id in scaled_groups:
                dn_count += scaled_groups[ng.id]
            else:
                dn_count += ng.count

    if dn_count < int(dfs_replication):
        raise ex.InvalidComponentCountException(
            'datanode', _('%s or more') % dfs_replication, dn_count,
            _('Number of %(dn)s instances should not be less '
              'than %(replication)s')
            % {'dn': 'DATANODE', 'replication': 'dfs.replication'})
Example #28
0
def node_group_template_update(context, values, ignore_default=False):
    session = get_session()
    try:
        with session.begin():
            ngt_id = values['id']
            ngt = _node_group_template_get(context, session, ngt_id)
            if not ngt:
                raise ex.NotFoundException(
                    ngt_id, _("NodeGroupTemplate id '%s' not found"))
            elif not ignore_default and ngt.is_default:
                raise ex.UpdateFailedException(
                    ngt_id,
                    _("NodeGroupTemplate id '%s' can not be updated. "
                      "It is a default template.")
                )

            # Check to see that the node group template to be updated is not in
            # use by an existing cluster.
            for template_relationship in ngt.templates_relations:
                if len(template_relationship.cluster_template.clusters) > 0:
                    raise ex.UpdateFailedException(
                        ngt_id,
                        _("NodeGroupTemplate id '%s' can not be updated. "
                          "It is referenced by an existing cluster.")
                    )

            ngt.update(values)
    except db_exc.DBDuplicateEntry as e:
        raise ex.DBDuplicateEntry(
            _("Duplicate entry for NodeGroupTemplate: %s") % e.columns)

    return ngt
Example #29
0
def proxy_user_delete(username=None, user_id=None):
    '''Delete the user from the proxy domain.

    :param username: The name of the user to delete.
    :param user_id: The id of the user to delete, if provided this overrides
                    the username.
    :raises NotFoundException: If there is an error locating the user in the
                               proxy domain.

    '''
    admin = k.client_for_admin()
    if not user_id:
        domain = domain_for_proxy()
        user_list = b.execute_with_retries(
            admin.users.list, domain=domain.id, name=username)
        if len(user_list) == 0:
            raise ex.NotFoundException(
                value=username,
                message_template=_('Failed to find user %s'))
        if len(user_list) > 1:
            raise ex.NotFoundException(
                value=username,
                message_template=_('Unexpected results found when searching '
                                   'for user %s'))
        user_id = user_list[0].id
    b.execute_with_retries(admin.users.delete, user_id)
    LOG.debug('Deleted proxy user id {user_id}'.format(user_id=user_id))
Example #30
0
def suspend_job(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    if job_execution.info['status'] not in edp.JOB_STATUSES_SUSPENDIBLE:
        raise e.SuspendingFailed(_("Suspending operation can not be performed"
                                 " on status: {status}")).format(
                                     status=job_execution.info['status'])
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    engine = get_job_engine(cluster, job_execution)
    job_execution = conductor.job_execution_update(
        ctx, job_execution_id, {
            'info': {'status': edp.JOB_STATUS_TOBESUSPENDED}})
    try:
        job_info = engine.suspend_job(job_execution)
    except Exception as ex:
        job_info = None
        conductor.job_execution_update(
            ctx, job_execution_id, {'info': {
                'status': edp.JOB_STATUS_SUSPEND_FAILED}})
        raise e.SuspendingFailed(_("Error during suspending of job execution: "
                                   "{error}")).format(error=ex)
    if job_info is not None:
        job_execution = _write_job_status(job_execution, job_info)
        LOG.info("Job execution was suspended successfully")
        return job_execution

    conductor.job_execution_update(
        ctx, job_execution_id, {'info': {
            'status': edp.JOB_STATUS_SUSPEND_FAILED}})
    raise e.SuspendingFailed(_("Failed to suspend job execution "
                               "{jid}")).format(jid=job_execution_id)
Example #31
0
def validate_cluster_creating(pctx, cluster):
    nn_count = _get_inst_count(cluster, 'namenode')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('namenode', 1, nn_count)

    snn_count = _get_inst_count(cluster, 'secondarynamenode')
    if snn_count > 1:
        raise ex.InvalidComponentCountException('secondarynamenode',
                                                _('0 or 1'), snn_count)

    rm_count = _get_inst_count(cluster, 'resourcemanager')
    if rm_count > 1:
        raise ex.InvalidComponentCountException('resourcemanager', _('0 or 1'),
                                                rm_count)

    hs_count = _get_inst_count(cluster, 'historyserver')
    if hs_count > 1:
        raise ex.InvalidComponentCountException('historyserver', _('0 or 1'),
                                                hs_count)

    nm_count = _get_inst_count(cluster, 'nodemanager')
    if rm_count == 0:
        if nm_count > 0:
            raise ex.RequiredServiceMissingException('resourcemanager',
                                                     required_by='nodemanager')

    oo_count = _get_inst_count(cluster, 'oozie')
    dn_count = _get_inst_count(cluster, 'datanode')
    if oo_count > 1:
        raise ex.InvalidComponentCountException('oozie', _('0 or 1'), oo_count)

    if oo_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException('datanode',
                                                     required_by='oozie')

        if nm_count < 1:
            raise ex.RequiredServiceMissingException('nodemanager',
                                                     required_by='oozie')

        if hs_count != 1:
            raise ex.RequiredServiceMissingException('historyserver',
                                                     required_by='oozie')

    spark_hist_count = _get_inst_count(cluster, 'spark history server')
    if spark_hist_count > 1:
        raise ex.InvalidComponentCountException('spark history server',
                                                _('0 or 1'), spark_hist_count)

    rep_factor = cu.get_config_value(pctx, 'HDFS', 'dfs.replication', cluster)
    if dn_count < rep_factor:
        raise ex.InvalidComponentCountException(
            'datanode', rep_factor, dn_count,
            _('Number of datanodes must be '
              'not less than '
              'dfs.replication.'))

    hive_count = _get_inst_count(cluster, 'hiveserver')
    if hive_count > 1:
        raise ex.InvalidComponentCountException('hive', _('0 or 1'),
                                                hive_count)
Example #32
0
def ctx():
    if not has_ctx():
        raise ex.IncorrectStateError(_("Context isn't available here"))
    return getattr(_CTX_STORE, _CTX_KEY)
Example #33
0
        CU.pu.install_packages(instances, PACKAGES)

    CU.pu.start_cloudera_agents(instances)
    CU.pu.start_cloudera_manager(cluster)
    CU.update_cloudera_password(cluster)
    CU.configure_rack_awareness(cluster)
    CU.await_agents(cluster, instances)
    CU.create_mgmt_service(cluster)
    CU.create_services(cluster)
    CU.configure_services(cluster)
    CU.configure_instances(instances, cluster)
    CU.deploy_configs(cluster)


@cpo.event_wrapper(True,
                   step=_("Start roles: NODEMANAGER, DATANODE"),
                   param=('cluster', 0))
def _start_roles(cluster, instances):
    for instance in instances:
        if 'HDFS_DATANODE' in instance.node_group.node_processes:
            hdfs = CU.get_service_by_role('DATANODE', instance=instance)
            CU.start_roles(hdfs, CU.pu.get_role_name(instance, 'DATANODE'))

        if 'YARN_NODEMANAGER' in instance.node_group.node_processes:
            yarn = CU.get_service_by_role('NODEMANAGER', instance=instance)
            CU.start_roles(yarn, CU.pu.get_role_name(instance, 'NODEMANAGER'))


def scale_cluster(cluster, instances):
    if not instances:
        return
Example #34
0
 def get_description(self):
     return _("This plugin provides an ability to launch Spark on Hadoop "
              "CDH cluster without any management consoles.")
Example #35
0
 def __init__(self, heat_stack_status):
     self.code = "HEAT_STACK_EXCEPTION"
     self.message = (_("Heat stack failed with status %s") %
                     heat_stack_status)
     super(HeatStackException, self).__init__()
Example #36
0
 def __init__(self, feature):
     self.message = _("Feature '%s' is not implemented") % feature
     super(NotImplementedException, self).__init__()
Example #37
0
 def __init__(self, ng_name):
     self.message = _("Node Group %s is missing 'floating_ip_pool' "
                      "field") % ng_name
     self.code = "MISSING_FLOATING_NETWORK"
     super(MissingFloatingNetworkException, self).__init__()
Example #38
0
class ClouderaUtils(object):
    CM_DEFAULT_USERNAME = '******'
    CM_DEFAULT_PASSWD = 'admin'
    CM_API_VERSION = 8

    HDFS_SERVICE_NAME = 'hdfs01'
    YARN_SERVICE_NAME = 'yarn01'
    OOZIE_SERVICE_NAME = 'oozie01'
    HIVE_SERVICE_NAME = 'hive01'
    HUE_SERVICE_NAME = 'hue01'
    SPARK_SERVICE_NAME = 'spark_on_yarn01'
    ZOOKEEPER_SERVICE_NAME = 'zookeeper01'
    HBASE_SERVICE_NAME = 'hbase01'

    FLUME_SERVICE_NAME = 'flume01'
    SOLR_SERVICE_NAME = 'solr01'
    SQOOP_SERVICE_NAME = 'sqoop01'
    KS_INDEXER_SERVICE_NAME = 'ks_indexer01'
    IMPALA_SERVICE_NAME = 'impala01'
    SENTRY_SERVICE_NAME = 'sentry01'
    KMS_SERVICE_NAME = 'kms01'
    KAFKA_SERVICE_NAME = 'kafka01'
    NAME_SERVICE = 'nameservice01'

    def __init__(self):
        self.pu = plugin_utils.AbstractPluginUtils()
        self.validator = validation.Validator
        self.c_helper = None

    def get_api_client_by_default_password(self, cluster):
        manager_ip = self.pu.get_manager(cluster).management_ip
        return api_client.ApiResource(manager_ip,
                                      username=self.CM_DEFAULT_USERNAME,
                                      password=self.CM_DEFAULT_PASSWD,
                                      version=self.CM_API_VERSION)

    def get_api_client(self, cluster, api_version=None):
        manager_ip = self.pu.get_manager(cluster).management_ip
        cm_password = dh.get_cm_password(cluster)
        version = self.CM_API_VERSION if not api_version else api_version
        return api_client.ApiResource(manager_ip,
                                      username=self.CM_DEFAULT_USERNAME,
                                      password=cm_password,
                                      version=version)

    def update_cloudera_password(self, cluster):
        api = self.get_api_client_by_default_password(cluster)
        user = api.get_user(self.CM_DEFAULT_USERNAME)
        user.password = dh.get_cm_password(cluster)
        api.update_user(user)

    def get_cloudera_cluster(self, cluster):
        api = self.get_api_client(cluster)
        return api.get_cluster(cluster.name)

    @cloudera_cmd
    def start_cloudera_cluster(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)
        yield cm_cluster.start()

    @cloudera_cmd
    def stop_cloudera_cluster(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)
        yield cm_cluster.stop()

    def start_instances(self, cluster):
        self.start_cloudera_cluster(cluster)

    @cpo.event_wrapper(True, step=_("Delete instances"), param=('cluster', 1))
    def delete_instances(self, cluster, instances):
        api = self.get_api_client(cluster)
        cm_cluster = self.get_cloudera_cluster(cluster)
        hosts = api.get_all_hosts(view='full')
        hostsnames_to_deleted = [i.fqdn() for i in instances]
        for host in hosts:
            if host.hostname in hostsnames_to_deleted:
                cm_cluster.remove_host(host.hostId)
                api.delete_host(host.hostId)

    @cpo.event_wrapper(
        True, step=_("Decommission nodes"), param=('cluster', 1))
    def decommission_nodes(self, cluster, process,
                           decommission_roles, roles_to_delete=None):
        service = self.get_service_by_role(process, cluster)
        service.decommission(*decommission_roles).wait()
        # not all roles should be decommissioned
        if roles_to_delete:
            decommission_roles.extend(roles_to_delete)
        for role_name in decommission_roles:
            service.delete_role(role_name)

    @cpo.event_wrapper(
        True, step=_("Refresh DataNodes"), param=('cluster', 1))
    def refresh_datanodes(self, cluster):
        self._refresh_nodes(cluster, 'DATANODE', self.HDFS_SERVICE_NAME)

    @cpo.event_wrapper(
        True, step=_("Refresh YARNNodes"), param=('cluster', 1))
    def refresh_yarn_nodes(self, cluster):
        self._refresh_nodes(cluster, 'NODEMANAGER', self.YARN_SERVICE_NAME)

    @cloudera_cmd
    def _refresh_nodes(self, cluster, process, service_name):
        cm_cluster = self.get_cloudera_cluster(cluster)
        service = cm_cluster.get_service(service_name)
        nds = [n.name for n in service.get_roles_by_type(process)]
        for nd in nds:
            for st in service.refresh(nd):
                yield st

    @cpo.event_wrapper(
        True, step=_("Restart stale services"), param=('cluster', 1))
    @cloudera_cmd
    def restart_stale_services(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)
        yield cm_cluster.restart(
            restart_only_stale_services=True,
            redeploy_client_configuration=True)

    @cpo.event_wrapper(True, step=_("Deploy configs"), param=('cluster', 1))
    @cloudera_cmd
    def deploy_configs(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)
        yield cm_cluster.deploy_client_config()

    def update_configs(self, instances):
        # instances non-empty
        cpo.add_provisioning_step(
            instances[0].cluster_id, _("Update configs"), len(instances))
        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("update-configs-%s" % instance.instance_name,
                         self._update_configs, instance)
                context.sleep(1)

    @cpo.event_wrapper(True)
    @cloudera_cmd
    def _update_configs(self, instance):
        for process in instance.node_group.node_processes:
            process = self.pu.convert_role_showname(process)
            service = self.get_service_by_role(process, instance=instance)
            yield service.deploy_client_config(self.pu.get_role_name(instance,
                                                                     process))

    def get_mgmt_service(self, cluster):
        api = self.get_api_client(cluster)
        cm = api.get_cloudera_manager()
        mgmt_service = cm.get_service()
        return mgmt_service

    @cloudera_cmd
    def restart_mgmt_service(self, cluster):
        service = self.get_mgmt_service(cluster)
        yield service.restart()

    @cloudera_cmd
    def start_service(self, service):
        yield service.start()

    @cloudera_cmd
    def stop_service(self, service):
        yield service.stop()

    @cloudera_cmd
    def start_roles(self, service, *role_names):
        for role in service.start_roles(*role_names):
            yield role

    @cpo.event_wrapper(
        True, step=_("Create mgmt service"), param=('cluster', 1))
    def create_mgmt_service(self, cluster):
        api = self.get_api_client(cluster)
        cm = api.get_cloudera_manager()

        setup_info = services.ApiServiceSetupInfo()
        manager = self.pu.get_manager(cluster)
        hostname = manager.fqdn()
        processes = ['SERVICEMONITOR', 'HOSTMONITOR',
                     'EVENTSERVER', 'ALERTPUBLISHER']
        for proc in processes:
            setup_info.add_role_info(self.pu.get_role_name(manager, proc),
                                     proc, hostname)

        cm.create_mgmt_service(setup_info)
        cm.hosts_start_roles([hostname])

    def get_service_by_role(self, role, cluster=None, instance=None):
        if cluster:
            cm_cluster = self.get_cloudera_cluster(cluster)
        elif instance:
            cm_cluster = self.get_cloudera_cluster(instance.cluster)
        else:
            raise ValueError(_("'cluster' or 'instance' argument missed"))

        if role in ['NAMENODE', 'DATANODE', 'SECONDARYNAMENODE',
                    'HDFS_GATEWAY']:
            return cm_cluster.get_service(self.HDFS_SERVICE_NAME)
        elif role in ['RESOURCEMANAGER', 'NODEMANAGER', 'JOBHISTORY',
                      'YARN_GATEWAY']:
            return cm_cluster.get_service(self.YARN_SERVICE_NAME)
        elif role in ['OOZIE_SERVER']:
            return cm_cluster.get_service(self.OOZIE_SERVICE_NAME)
        elif role in ['HIVESERVER2', 'HIVEMETASTORE', 'WEBHCAT']:
            return cm_cluster.get_service(self.HIVE_SERVICE_NAME)
        elif role in ['HUE_SERVER']:
            return cm_cluster.get_service(self.HUE_SERVICE_NAME)
        elif role in ['SPARK_YARN_HISTORY_SERVER']:
            return cm_cluster.get_service(self.SPARK_SERVICE_NAME)
        elif role in ['SERVER']:
            return cm_cluster.get_service(self.ZOOKEEPER_SERVICE_NAME)
        elif role in ['MASTER', 'REGIONSERVER']:
            return cm_cluster.get_service(self.HBASE_SERVICE_NAME)
        elif role in ['AGENT']:
            return cm_cluster.get_service(self.FLUME_SERVICE_NAME)
        elif role in ['SENTRY_SERVER']:
            return cm_cluster.get_service(self.SENTRY_SERVICE_NAME)
        elif role in ['SQOOP_SERVER']:
            return cm_cluster.get_service(self.SQOOP_SERVICE_NAME)
        elif role in ['SOLR_SERVER']:
            return cm_cluster.get_service(self.SOLR_SERVICE_NAME)
        elif role in ['HBASE_INDEXER']:
            return cm_cluster.get_service(self.KS_INDEXER_SERVICE_NAME)
        elif role in ['CATALOGSERVER', 'STATESTORE', 'IMPALAD', 'LLAMA']:
            return cm_cluster.get_service(self.IMPALA_SERVICE_NAME)
        elif role in ['KMS']:
            return cm_cluster.get_service(self.KMS_SERVICE_NAME)
        elif role in ['JOURNALNODE']:
            return cm_cluster.get_service(self.HDFS_SERVICE_NAME)
        elif role in ['YARN_STANDBYRM']:
            return cm_cluster.get_service(self.YARN_SERVICE_NAME)
        elif role in ['KAFKA_BROKER']:
            return cm_cluster.get_service(self.KAFKA_SERVICE_NAME)
        else:
            raise ValueError(
                _("Process %(process)s is not supported by CDH plugin") %
                {'process': role})

    @cpo.event_wrapper(
        True, step=_("First run cluster"), param=('cluster', 1))
    @cloudera_cmd
    def first_run(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)
        yield cm_cluster.first_run()

    @cpo.event_wrapper(True, step=_("Create services"), param=('cluster', 1))
    def create_services(self, cluster):
        api = self.get_api_client(cluster)
        cm_cluster = api.create_cluster(cluster.name,
                                        fullVersion=cluster.hadoop_version)

        if len(self.pu.get_zookeepers(cluster)) > 0:
            cm_cluster.create_service(self.ZOOKEEPER_SERVICE_NAME,
                                      ZOOKEEPER_SERVICE_TYPE)
        cm_cluster.create_service(self.HDFS_SERVICE_NAME, HDFS_SERVICE_TYPE)
        cm_cluster.create_service(self.YARN_SERVICE_NAME, YARN_SERVICE_TYPE)
        cm_cluster.create_service(self.OOZIE_SERVICE_NAME, OOZIE_SERVICE_TYPE)
        if self.pu.get_hive_metastore(cluster):
            cm_cluster.create_service(self.HIVE_SERVICE_NAME,
                                      HIVE_SERVICE_TYPE)
        if self.pu.get_hue(cluster):
            cm_cluster.create_service(self.HUE_SERVICE_NAME, HUE_SERVICE_TYPE)
        if self.pu.get_spark_historyserver(cluster):
            cm_cluster.create_service(self.SPARK_SERVICE_NAME,
                                      SPARK_SERVICE_TYPE)
        if self.pu.get_hbase_master(cluster):
            cm_cluster.create_service(self.HBASE_SERVICE_NAME,
                                      HBASE_SERVICE_TYPE)
        if len(self.pu.get_flumes(cluster)) > 0:
            cm_cluster.create_service(self.FLUME_SERVICE_NAME,
                                      FLUME_SERVICE_TYPE)
        if self.pu.get_sentry(cluster):
            cm_cluster.create_service(self.SENTRY_SERVICE_NAME,
                                      SENTRY_SERVICE_TYPE)
        if len(self.pu.get_solrs(cluster)) > 0:
            cm_cluster.create_service(self.SOLR_SERVICE_NAME,
                                      SOLR_SERVICE_TYPE)
        if self.pu.get_sqoop(cluster):
            cm_cluster.create_service(self.SQOOP_SERVICE_NAME,
                                      SQOOP_SERVICE_TYPE)
        if len(self.pu.get_hbase_indexers(cluster)) > 0:
            cm_cluster.create_service(self.KS_INDEXER_SERVICE_NAME,
                                      KS_INDEXER_SERVICE_TYPE)
        if self.pu.get_catalogserver(cluster):
            cm_cluster.create_service(self.IMPALA_SERVICE_NAME,
                                      IMPALA_SERVICE_TYPE)
        if self.pu.get_kms(cluster):
            cm_cluster.create_service(self.KMS_SERVICE_NAME,
                                      KMS_SERVICE_TYPE)
        if len(self.pu.get_kafka_brokers(cluster)) > 0:
            cm_cluster.create_service(self.KAFKA_SERVICE_NAME,
                                      KAFKA_SERVICE_TYPE)

    def _agents_connected(self, instances, api):
        hostnames = [i.fqdn() for i in instances]
        hostnames_to_manager = [h.hostname for h in
                                api.get_all_hosts('full')]
        for hostname in hostnames:
            if hostname not in hostnames_to_manager:
                return False
        return True

    @cpo.event_wrapper(True, step=_("Await agents"), param=('cluster', 1))
    def _await_agents(self, cluster, instances, timeout_config):
        api = self.get_api_client(instances[0].cluster)
        poll_utils.plugin_option_poll(
            cluster, self._agents_connected, timeout_config,
            _("Await Cloudera agents"), 5, {
                'instances': instances, 'api': api})

    def await_agents(self, cluster, instances):
        self._await_agents(cluster, instances,
                           self.c_helper.AWAIT_AGENTS_TIMEOUT)

    @cpo.event_wrapper(
        True, step=_("Configure services"), param=('cluster', 1))
    def configure_services(self, cluster):
        cm_cluster = self.get_cloudera_cluster(cluster)

        if len(self.pu.get_zookeepers(cluster)) > 0:
            zookeeper = cm_cluster.get_service(self.ZOOKEEPER_SERVICE_NAME)
            zookeeper.update_config(self._get_configs(ZOOKEEPER_SERVICE_TYPE,
                                                      cluster=cluster))

        hdfs = cm_cluster.get_service(self.HDFS_SERVICE_NAME)
        hdfs.update_config(self._get_configs(HDFS_SERVICE_TYPE,
                                             cluster=cluster))

        yarn = cm_cluster.get_service(self.YARN_SERVICE_NAME)
        yarn.update_config(self._get_configs(YARN_SERVICE_TYPE,
                                             cluster=cluster))

        oozie = cm_cluster.get_service(self.OOZIE_SERVICE_NAME)
        oozie.update_config(self._get_configs(OOZIE_SERVICE_TYPE,
                                              cluster=cluster))

        if self.pu.get_hive_metastore(cluster):
            hive = cm_cluster.get_service(self.HIVE_SERVICE_NAME)
            hive.update_config(self._get_configs(HIVE_SERVICE_TYPE,
                                                 cluster=cluster))

        if self.pu.get_hue(cluster):
            hue = cm_cluster.get_service(self.HUE_SERVICE_NAME)
            hue.update_config(self._get_configs(HUE_SERVICE_TYPE,
                                                cluster=cluster))

        if self.pu.get_spark_historyserver(cluster):
            spark = cm_cluster.get_service(self.SPARK_SERVICE_NAME)
            spark.update_config(self._get_configs(SPARK_SERVICE_TYPE,
                                                  cluster=cluster))

        if self.pu.get_hbase_master(cluster):
            hbase = cm_cluster.get_service(self.HBASE_SERVICE_NAME)
            hbase.update_config(self._get_configs(HBASE_SERVICE_TYPE,
                                                  cluster=cluster))

        if len(self.pu.get_flumes(cluster)) > 0:
            flume = cm_cluster.get_service(self.FLUME_SERVICE_NAME)
            flume.update_config(self._get_configs(FLUME_SERVICE_TYPE,
                                                  cluster=cluster))

        if self.pu.get_sentry(cluster):
            sentry = cm_cluster.get_service(self.SENTRY_SERVICE_NAME)
            sentry.update_config(self._get_configs(SENTRY_SERVICE_TYPE,
                                                   cluster=cluster))

        if len(self.pu.get_solrs(cluster)) > 0:
            solr = cm_cluster.get_service(self.SOLR_SERVICE_NAME)
            solr.update_config(self._get_configs(SOLR_SERVICE_TYPE,
                                                 cluster=cluster))

        if self.pu.get_sqoop(cluster):
            sqoop = cm_cluster.get_service(self.SQOOP_SERVICE_NAME)
            sqoop.update_config(self._get_configs(SQOOP_SERVICE_TYPE,
                                                  cluster=cluster))

        if len(self.pu.get_hbase_indexers(cluster)) > 0:
            ks_indexer = cm_cluster.get_service(self.KS_INDEXER_SERVICE_NAME)
            ks_indexer.update_config(
                self._get_configs(KS_INDEXER_SERVICE_TYPE, cluster=cluster))

        if self.pu.get_catalogserver(cluster):
            impala = cm_cluster.get_service(self.IMPALA_SERVICE_NAME)
            impala.update_config(self._get_configs(IMPALA_SERVICE_TYPE,
                                                   cluster=cluster))

        if self.pu.get_kms(cluster):
            kms = cm_cluster.get_service(self.KMS_SERVICE_NAME)
            kms.update_config(self._get_configs(KMS_SERVICE_TYPE,
                                                cluster=cluster))
        if len(self.pu.get_kafka_brokers(cluster)) > 0:
            kafka = cm_cluster.get_service(self.KAFKA_SERVICE_NAME)
            kafka.update_config(self._get_configs(KAFKA_SERVICE_TYPE,
                                                  cluster=cluster))

    def configure_instances(self, instances, cluster=None):
        # instances non-empty
        cpo.add_provisioning_step(
            instances[0].cluster_id, _("Configure instances"), len(instances))
        for inst in instances:
            self.configure_instance(inst, cluster)

    def get_roles_list(self, node_processes):
        current = set(node_processes)
        extra_roles = {
            'YARN_GATEWAY': ["YARN_NODEMANAGER"],
            'HDFS_GATEWAY': ['HDFS_NAMENODE', 'HDFS_DATANODE',
                             "HDFS_SECONDARYNAMENODE"]
        }
        for extra_role in six.iterkeys(extra_roles):
            valid_processes = extra_roles[extra_role]
            for valid in valid_processes:
                if valid in current:
                    current.add(extra_role)
                    break
        return list(current)

    def get_role_type(self, process):
        mapper = {
            'YARN_GATEWAY': 'GATEWAY',
            'HDFS_GATEWAY': 'GATEWAY',
        }
        return mapper.get(process, process)

    @cpo.event_wrapper(True)
    def configure_instance(self, instance, cluster=None):
        roles_list = self.get_roles_list(instance.node_group.node_processes)
        for role in roles_list:
            self._add_role(instance, role, cluster)

    def _add_role(self, instance, process, cluster):
        if process in ['CLOUDERA_MANAGER', 'HDFS_JOURNALNODE',
                       'YARN_STANDBYRM']:
            return

        process = self.pu.convert_role_showname(process)
        service = self.get_service_by_role(process, instance=instance)
        role_type = self.get_role_type(process)
        role = service.create_role(self.pu.get_role_name(instance, process),
                                   role_type, instance.fqdn())
        role.update_config(self._get_configs(process, cluster,
                                             instance=instance))

    @cloudera_cmd
    def restart_service(self, process, instance):
        service = self.get_service_by_role(process, instance=instance)
        yield service.restart()

    def update_role_config(self, instance, process):
        process = self.pu.convert_role_showname(process)
        service = self.get_service_by_role(process, instance=instance)
        api = self.get_api_client(instance.cluster)
        hosts = api.get_all_hosts(view='full')
        ihost_id = None
        for host in hosts:
            if instance.fqdn() == host.hostname:
                ihost_id = host.hostId
                break
        role_type = self.get_role_type(process)
        roles = service.get_roles_by_type(role_type)
        for role in roles:
            if role.hostRef.hostId == ihost_id:
                role.update_config(
                    self._get_configs(role_type, instance=instance))
        self.restart_service(process, instance)

    @cloudera_cmd
    def import_admin_credentials(self, cm, username, password):
        yield cm.import_admin_credentials(username, password)

    @cloudera_cmd
    def configure_for_kerberos(self, cluster):
        api = self.get_api_client(cluster, api_version=11)
        cluster = api.get_cluster(cluster.name)
        yield cluster.configure_for_kerberos()

    def push_kerberos_configs(self, cluster):
        manager = self.pu.get_manager(cluster)
        kdc_host = kerberos.get_kdc_host(cluster, manager)
        security_realm = kerberos.get_realm_name(cluster)
        username = "******" % (kerberos.get_admin_principal(cluster),
                              kerberos.get_realm_name(cluster))
        password = kerberos.get_server_password(cluster)

        api = self.get_api_client(cluster)
        cm = api.get_cloudera_manager()
        cm.update_config({'SECURITY_REALM': security_realm,
                          'KDC_HOST': kdc_host})

        self.import_admin_credentials(cm, username, password)
        self.configure_for_kerberos(cluster)
        self.deploy_configs(cluster)

    def configure_rack_awareness(self, cluster):
        if t_helper.is_data_locality_enabled():
            self._configure_rack_awareness(cluster)

    @cpo.event_wrapper(
        True, step=_("Configure rack awareness"), param=('cluster', 1))
    def _configure_rack_awareness(self, cluster):
        api = self.get_api_client(cluster)
        topology = t_helper.generate_topology_map(
            cluster, is_node_awareness=False)
        for host in api.get_all_hosts():
            host.rackId = topology[host.ipAddress]
            host.put_host()

    def full_cluster_stop(self, cluster):
        self.stop_cloudera_cluster(cluster)
        mgmt = self.get_mgmt_service(cluster)
        self.stop_service(mgmt)

    def full_cluster_start(self, cluster):
        self.start_cloudera_cluster(cluster)
        mgmt = self.get_mgmt_service(cluster)
        self.start_service(mgmt)

    def get_cloudera_manager_info(self, cluster):
        mng = self.pu.get_manager(cluster)
        info = {
            'Cloudera Manager': {
                'Web UI': 'http://%s:7180' % mng.get_ip_or_dns_name(),
                'Username': '******',
                'Password': dh.get_cm_password(cluster)
            }
        }
        return info

    @cpo.event_wrapper(
        True, step=_("Enable NameNode HA"), param=('cluster', 1))
    @cloudera_cmd
    def enable_namenode_ha(self, cluster):
        standby_nn = self.pu.get_secondarynamenode(cluster)
        standby_nn_host_name = standby_nn.fqdn()
        jns = self.pu.get_jns(cluster)
        jn_list = []
        for index, jn in enumerate(jns):
            jn_host_name = jn.fqdn()
            jn_list.append({'jnHostId': jn_host_name,
                            'jnName': 'JN%i' % index,
                            'jnEditsDir': '/dfs/jn'
                            })
        cm_cluster = self.get_cloudera_cluster(cluster)
        hdfs = cm_cluster.get_service(self.HDFS_SERVICE_NAME)
        nn = hdfs.get_roles_by_type('NAMENODE')[0]

        yield hdfs.enable_nn_ha(active_name=nn.name,
                                standby_host_id=standby_nn_host_name,
                                nameservice=self.NAME_SERVICE, jns=jn_list
                                )

    @cpo.event_wrapper(
        True, step=_("Enable ResourceManager HA"), param=('cluster', 1))
    @cloudera_cmd
    def enable_resourcemanager_ha(self, cluster):
        new_rm = self.pu.get_stdb_rm(cluster)
        new_rm_host_name = new_rm.fqdn()
        cm_cluster = self.get_cloudera_cluster(cluster)
        yarn = cm_cluster.get_service(self.YARN_SERVICE_NAME)
        yield yarn.enable_rm_ha(new_rm_host_id=new_rm_host_name)

    def _get_configs(self, service, cluster=None, instance=None):
        def get_hadoop_dirs(mount_points, suffix):
            return ','.join([x + suffix for x in mount_points])

        all_confs = {}
        if cluster:
            zk_count = self.validator.get_inst_count(cluster,
                                                     'ZOOKEEPER_SERVER')
            hbm_count = self.validator.get_inst_count(cluster, 'HBASE_MASTER')
            snt_count = self.validator.get_inst_count(cluster,
                                                      'SENTRY_SERVER')
            ks_count =\
                self.validator.get_inst_count(cluster,
                                              'KEY_VALUE_STORE_INDEXER')
            kms_count = self.validator.get_inst_count(cluster, 'KMS')
            imp_count =\
                self.validator.get_inst_count(cluster,
                                              'IMPALA_CATALOGSERVER')
            hive_count = self.validator.get_inst_count(cluster,
                                                       'HIVE_METASTORE')
            slr_count = self.validator.get_inst_count(cluster, 'SOLR_SERVER')
            sqp_count = self.validator.get_inst_count(cluster, 'SQOOP_SERVER')
            core_site_safety_valve = ''
            if self.pu.c_helper.is_swift_enabled(cluster):
                configs = swift_helper.get_swift_configs()
                confs = {c['name']: c['value'] for c in configs}
                core_site_safety_valve = xmlutils.create_elements_xml(confs)
            all_confs = {
                'HDFS': {
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                    'dfs_block_local_path_access_user':
                        '******' if imp_count else '',
                    'kms_service': self.KMS_SERVICE_NAME if kms_count else '',
                    'core_site_safety_valve': core_site_safety_valve
                },
                'HIVE': {
                    'mapreduce_yarn_service': self.YARN_SERVICE_NAME,
                    'sentry_service':
                        self.SENTRY_SERVICE_NAME if snt_count else '',
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else ''
                },
                'OOZIE': {
                    'mapreduce_yarn_service': self.YARN_SERVICE_NAME,
                    'hive_service':
                        self.HIVE_SERVICE_NAME if hive_count else '',
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else ''
                },
                'YARN': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else ''
                },
                'HUE': {
                    'hive_service': self.HIVE_SERVICE_NAME,
                    'oozie_service': self.OOZIE_SERVICE_NAME,
                    'sentry_service':
                        self.SENTRY_SERVICE_NAME if snt_count else '',
                    'solr_service':
                        self.SOLR_SERVICE_NAME if slr_count else '',
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else '',
                    'hbase_service':
                        self.HBASE_SERVICE_NAME if hbm_count else '',
                    'impala_service':
                        self.IMPALA_SERVICE_NAME if imp_count else '',
                    'sqoop_service':
                        self.SQOOP_SERVICE_NAME if sqp_count else ''
                },
                'SPARK_ON_YARN': {
                    'yarn_service': self.YARN_SERVICE_NAME
                },
                'HBASE': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'zookeeper_service': self.ZOOKEEPER_SERVICE_NAME,
                    'hbase_enable_indexing': 'true' if ks_count else 'false',
                    'hbase_enable_replication':
                        'true' if ks_count else 'false'
                },
                'FLUME': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'solr_service':
                        self.SOLR_SERVICE_NAME if slr_count else '',
                    'hbase_service':
                        self.HBASE_SERVICE_NAME if hbm_count else ''
                },
                'SENTRY': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'sentry_server_config_safety_valve': (
                        self.c_helper.SENTRY_IMPALA_CLIENT_SAFETY_VALVE
                        if imp_count else '')
                },
                'SOLR': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'zookeeper_service': self.ZOOKEEPER_SERVICE_NAME
                },
                'SQOOP': {
                    'mapreduce_yarn_service': self.YARN_SERVICE_NAME
                },
                'KS_INDEXER': {
                    'hbase_service': self.HBASE_SERVICE_NAME,
                    'solr_service': self.SOLR_SERVICE_NAME
                },
                'IMPALA': {
                    'hdfs_service': self.HDFS_SERVICE_NAME,
                    'hbase_service':
                        self.HBASE_SERVICE_NAME if hbm_count else '',
                    'hive_service': self.HIVE_SERVICE_NAME,
                    'sentry_service':
                        self.SENTRY_SERVICE_NAME if snt_count else '',
                    'zookeeper_service':
                        self.ZOOKEEPER_SERVICE_NAME if zk_count else ''
                }
            }
            hive_confs = {
                'HIVE': {
                    'hive_metastore_database_type': 'postgresql',
                    'hive_metastore_database_host':
                        self.pu.get_manager(cluster).internal_ip,
                    'hive_metastore_database_port': '7432',
                    'hive_metastore_database_password':
                        dh.get_hive_db_password(cluster)
                }
            }
            hue_confs = {
                'HUE': {
                    'hue_webhdfs': self.pu.get_role_name(
                        self.pu.get_namenode(cluster), 'NAMENODE')
                }
            }
            sentry_confs = {
                'SENTRY': {
                    'sentry_server_database_type': 'postgresql',
                    'sentry_server_database_host':
                        self.pu.get_manager(cluster).internal_ip,
                    'sentry_server_database_port': '7432',
                    'sentry_server_database_password':
                        dh.get_sentry_db_password(cluster)
                }
            }
            kafka_confs = {
                'KAFKA': {
                    'zookeeper_service': self.ZOOKEEPER_SERVICE_NAME
                }
            }
            all_confs = s_cfg.merge_configs(all_confs, hue_confs)
            all_confs = s_cfg.merge_configs(all_confs, hive_confs)
            all_confs = s_cfg.merge_configs(all_confs, sentry_confs)
            all_confs = s_cfg.merge_configs(all_confs, kafka_confs)
            all_confs = s_cfg.merge_configs(all_confs, cluster.cluster_configs)

        if instance:
            snt_count = self.validator.get_inst_count(instance.cluster,
                                                      'SENTRY_SERVER')
            paths = instance.storage_paths()

            instance_default_confs = {
                'NAMENODE': {
                    'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
                },
                'SECONDARYNAMENODE': {
                    'fs_checkpoint_dir_list':
                        get_hadoop_dirs(paths, '/fs/snn')
                },
                'DATANODE': {
                    'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'),
                    'dfs_datanode_data_dir_perm': 755,
                    'dfs_datanode_handler_count': 30
                },
                'NODEMANAGER': {
                    'yarn_nodemanager_local_dirs':
                        get_hadoop_dirs(paths, '/yarn/local'),
                    'container_executor_allowed_system_users':
                        "nobody,impala,hive,llama,hdfs,yarn,mapred,"
                        "spark,oozie",
                    "container_executor_banned_users": "bin"
                },
                'SERVER': {
                    'maxSessionTimeout': 60000
                },
                'HIVESERVER2': {
                    'hiveserver2_enable_impersonation':
                        'false' if snt_count else 'true',
                    'hive_hs2_config_safety_valve': (
                        self.c_helper.HIVE_SERVER2_SENTRY_SAFETY_VALVE
                        if snt_count else '')
                },
                'HIVEMETASTORE': {
                    'hive_metastore_config_safety_valve': (
                        self.c_helper.HIVE_METASTORE_SENTRY_SAFETY_VALVE
                        if snt_count else '')
                }
            }

            ng_user_confs = self.pu.convert_process_configs(
                instance.node_group.node_configs)
            all_confs = s_cfg.merge_configs(all_confs, ng_user_confs)
            all_confs = s_cfg.merge_configs(all_confs, instance_default_confs)

        return all_confs.get(service, {})
Example #39
0
 def _await_agents(self, cluster, instances, timeout_config):
     api = self.get_api_client(instances[0].cluster)
     poll_utils.plugin_option_poll(
         cluster, self._agents_connected, timeout_config,
         _("Await Cloudera agents"), 5, {
             'instances': instances, 'api': api})
Example #40
0
class MapRFS(s.Service):
    _CREATE_DISK_LIST = 'plugins/mapr/resources/create_disk_list_file.sh'
    _DISK_SETUP_CMD = '/opt/mapr/server/disksetup -F /tmp/disk.list'
    _DISK_SETUP_TIMEOUT = 600

    ENABLE_MAPR_DB_NAME = 'Enable MapR-DB'
    HEAP_SIZE_PERCENT_NAME = 'MapR-FS heap size percent'

    ENABLE_MAPR_DB_CONFIG = p.Config(
        name=ENABLE_MAPR_DB_NAME,
        applicable_target='general',
        scope='cluster',
        config_type="bool",
        priority=1,
        default_value=True,
        description=_('Specifies that MapR-DB is in use.'))

    HEAP_SIZE_PERCENT = p.Config(
        name=HEAP_SIZE_PERCENT_NAME,
        applicable_target='MapRFS',
        scope='cluster',
        config_type="int",
        priority=1,
        default_value=8,
        description=_(
            'Specifies heap size for MapR-FS in percents of maximum value.'))

    def __init__(self):
        super(MapRFS, self).__init__()
        self._ui_name = 'MapRFS'
        self._node_processes = [CLDB, FILE_SERVER, NFS]
        self._ui_info = [
            ('Container Location Database (CLDB)', CLDB, {
                s.SERVICE_UI: 'http://%s:7221'
            }),
        ]
        self._validation_rules = [
            vu.at_least(1, CLDB),
            vu.each_node_has(FILE_SERVER),
            vu.on_same_node(CLDB, FILE_SERVER),
            vu.has_volumes(),
        ]

    def service_dir(self, cluster_context):
        return

    def home_dir(self, cluster_context):
        return

    def conf_dir(self, cluster_context):
        return '%s/conf' % cluster_context.mapr_home

    def post_install(self, cluster_context, instances):
        LOG.debug('Initializing MapR FS')
        instances = instances or cluster_context.get_instances()
        file_servers = cluster_context.filter_instances(instances, FILE_SERVER)
        with context.ThreadGroup() as tg:
            for instance in file_servers:
                tg.spawn('init-mfs-%s' % instance.id, self._init_mfs_instance,
                         instance)
        LOG.info(_LI('MapR FS successfully initialized'))

    def _init_mfs_instance(self, instance):
        self._generate_disk_list_file(instance, self._CREATE_DISK_LIST)
        self._execute_disksetup(instance)

    def _generate_disk_list_file(self, instance, path_to_disk_setup_script):
        LOG.debug('Creating disk list file')
        g.run_script(instance, path_to_disk_setup_script, 'root',
                     *instance.storage_paths())

    def _execute_disksetup(self, instance):
        with instance.remote() as rmt:
            rmt.execute_command(self._DISK_SETUP_CMD,
                                run_as_root=True,
                                timeout=self._DISK_SETUP_TIMEOUT)

    def get_configs(self):
        return [MapRFS.ENABLE_MAPR_DB_CONFIG, MapRFS.HEAP_SIZE_PERCENT]

    def get_config_files(self, cluster_context, configs, instance=None):
        default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf'
        cldb_conf = bcf.PropertiesFile("cldb.conf")
        cldb_conf.remote_path = self.conf_dir(cluster_context)
        if instance:
            cldb_conf.fetch(instance)
        cldb_conf.parse(files.get_file_text(default_path))
        cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context))

        warden_conf = bcf.PropertiesFile("warden.conf")
        warden_conf.remote_path = "/opt/mapr/conf/"
        if instance:
            warden_conf.fetch(instance)
        warden_conf.add_properties({
            'service.command.mfs.heapsize.percent':
            configs[self.HEAP_SIZE_PERCENT_NAME]
        })

        return [cldb_conf, warden_conf]

    def _get_cldb_conf_props(self, cluster_context):
        zookeepers = cluster_context.get_zookeeper_nodes_ip_with_port()
        return {'cldb.zookeeper.servers': zookeepers}
Example #41
0
 def get_description(self):
     return _('The Apache Vanilla plugin provides the ability to launch '
              'upstream Vanilla Apache Hadoop cluster without any '
              'management consoles. It can also deploy the Oozie '
              'component.')
Example #42
0
 def configure_instances(self, instances, cluster=None):
     # instances non-empty
     cpo.add_provisioning_step(
         instances[0].cluster_id, _("Configure instances"), len(instances))
     for inst in instances:
         self.configure_instance(inst, cluster)
Example #43
0
    def run_job(self, job_execution):
        ctx = context.ctx()
        job = conductor.job_get(ctx, job_execution.job_id)
        # This will be a dictionary of tuples, (native_url, runtime_url)
        # keyed by data_source id
        data_source_urls = {}
        additional_sources, updated_job_configs = (
            job_utils.resolve_data_source_references(job_execution.job_configs,
                                                     job_execution.id,
                                                     data_source_urls,
                                                     self.cluster)
        )

        job_execution = conductor.job_execution_update(
            ctx, job_execution,
            {"data_source_urls": job_utils.to_url_dict(data_source_urls)})

        # Now that we've recorded the native urls, we can switch to the
        # runtime urls
        data_source_urls = job_utils.to_url_dict(data_source_urls,
                                                 runtime=True)

        job_utils.prepare_cluster_for_ds(additional_sources,
                                         self.cluster, updated_job_configs,
                                         data_source_urls)

        # It is needed in case we are working with Spark plugin
        self.plugin_params['master'] = (
            self.plugin_params['master'] % {'host': self.master.hostname()})

        # TODO(tmckay): wf_dir should probably be configurable.
        # The only requirement is that the dir is writable by the image user
        wf_dir = job_utils.create_workflow_dir(self.master, '/tmp/spark-edp',
                                               job, job_execution.id, "700")
        paths, builtin_paths = self._upload_job_files(
            self.master, wf_dir, job, updated_job_configs)

        # We can shorten the paths in this case since we'll run out of wf_dir
        paths = [os.path.basename(p) if p.startswith(wf_dir) else p
                 for p in paths]
        builtin_paths = [os.path.basename(p) for p in builtin_paths]

        cmd = self._build_command(wf_dir, paths, builtin_paths,
                                  updated_job_configs)

        job_execution = conductor.job_execution_get(ctx, job_execution.id)
        if job_execution.info['status'] == edp.JOB_STATUS_TOBEKILLED:
            return (None, edp.JOB_STATUS_KILLED, None)

        # If an exception is raised here, the job_manager will mark
        # the job failed and log the exception
        # The redirects of stdout and stderr will preserve output in the wf_dir
        with remote.get_remote(self.master) as r:
            # Upload the command launch script
            launch = os.path.join(wf_dir, "launch_command")
            python_version = r.get_python_version()
            r.write_file_to(launch, self._job_script(python_version))
            r.execute_command("chmod u+rwx,g+rx,o+rx %s" % wf_dir)
            r.execute_command("chmod +x %s" % launch)
            ret, stdout = r.execute_command(
                "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!"
                % (wf_dir, cmd))

        if ret == 0:
            # Success, we'll add the wf_dir in job_execution.extra and store
            # pid@instance_id as the job id

            # We know the job is running so return "RUNNING"
            return (stdout.strip() + "@" + self.master.id,
                    edp.JOB_STATUS_RUNNING,
                    {'spark-path': wf_dir})

        # Hmm, no execption but something failed.
        # Since we're using backgrounding with redirect, this is unlikely.
        raise e.EDPError(_("Spark job execution failed. Exit status = "
                           "%(status)s, stdout = %(stdout)s") %
                         {'status': ret, 'stdout': stdout})
Example #44
0
class AbstractPluginUtils(object):

    def __init__(self):
        # c_helper and db_helper will be defined in derived classes.
        self.c_helper = None
        self.db_helper = None

    def get_role_name(self, instance, service):
        # NOTE: role name must match regexp "[_A-Za-z][-_A-Za-z0-9]{0,63}"
        shortcuts = {
            'ALERTPUBLISHER': 'AP',
            'DATANODE': 'DN',
            'EVENTSERVER': 'ES',
            'HIVEMETASTORE': 'HVM',
            'HIVESERVER2': 'HVS',
            'HOSTMONITOR': 'HM',
            'JOBHISTORY': 'JS',
            'MASTER': 'M',
            'NAMENODE': 'NN',
            'NODEMANAGER': 'NM',
            'OOZIE_SERVER': 'OS',
            'REGIONSERVER': 'RS',
            'RESOURCEMANAGER': 'RM',
            'SECONDARYNAMENODE': 'SNN',
            'SERVER': 'S',
            'SERVICEMONITOR': 'SM',
            'SPARK_YARN_HISTORY_SERVER': 'SHS',
            'WEBHCAT': 'WHC'
        }
        return '%s_%s' % (shortcuts.get(service, service),
                          instance.hostname().replace('-', '_'))

    def get_manager(self, cluster):
        return u.get_instance(cluster, 'CLOUDERA_MANAGER')

    def get_namenode(self, cluster):
        return u.get_instance(cluster, "HDFS_NAMENODE")

    def get_datanodes(self, cluster):
        return u.get_instances(cluster, 'HDFS_DATANODE')

    def get_secondarynamenode(self, cluster):
        return u.get_instance(cluster, 'HDFS_SECONDARYNAMENODE')

    def get_historyserver(self, cluster):
        return u.get_instance(cluster, 'YARN_JOBHISTORY')

    def get_resourcemanager(self, cluster):
        return u.get_instance(cluster, 'YARN_RESOURCEMANAGER')

    def get_nodemanagers(self, cluster):
        return u.get_instances(cluster, 'YARN_NODEMANAGER')

    def get_oozie(self, cluster):
        return u.get_instance(cluster, 'OOZIE_SERVER')

    def get_hive_metastore(self, cluster):
        return u.get_instance(cluster, 'HIVE_METASTORE')

    def get_hive_servers(self, cluster):
        return u.get_instances(cluster, 'HIVE_SERVER2')

    def get_hue(self, cluster):
        return u.get_instance(cluster, 'HUE_SERVER')

    def get_spark_historyserver(self, cluster):
        return u.get_instance(cluster, 'SPARK_YARN_HISTORY_SERVER')

    def get_zookeepers(self, cluster):
        return u.get_instances(cluster, 'ZOOKEEPER_SERVER')

    def get_hbase_master(self, cluster):
        return u.get_instance(cluster, 'HBASE_MASTER')

    def convert_process_configs(self, configs):
        p_dict = {
            "CLOUDERA": ['MANAGER'],
            "NAMENODE": ['NAMENODE'],
            "DATANODE": ['DATANODE'],
            "SECONDARYNAMENODE": ['SECONDARYNAMENODE'],
            "RESOURCEMANAGER": ['RESOURCEMANAGER'],
            "NODEMANAGER": ['NODEMANAGER'],
            "JOBHISTORY": ['JOBHISTORY'],
            "OOZIE": ['OOZIE_SERVER'],
            "HIVESERVER": ['HIVESERVER2'],
            "HIVEMETASTORE": ['HIVEMETASTORE'],
            "WEBHCAT": ['WEBHCAT'],
            "HUE": ['HUE_SERVER'],
            "SPARK_ON_YARN": ['SPARK_YARN_HISTORY_SERVER'],
            "ZOOKEEPER": ['SERVER'],
            "MASTER": ['MASTER'],
            "REGIONSERVER": ['REGIONSERVER'],
            'YARN_GATEWAY': ['YARN_GATEWAY'],
            'HDFS_GATEWAY': ['HDFS_GATEWAY']
        }
        if isinstance(configs, res.Resource):
            configs = configs.to_dict()
        for k in configs.keys():
            if k in p_dict.keys():
                item = configs[k]
                del configs[k]
                newkey = p_dict[k][0]
                configs[newkey] = item
        return res.Resource(configs)

    def convert_role_showname(self, showname):
        # Yarn ResourceManager and Standby ResourceManager will
        # be converted to ResourceManager.
        name_dict = {
            'CLOUDERA_MANAGER': 'MANAGER',
            'HDFS_NAMENODE': 'NAMENODE',
            'HDFS_DATANODE': 'DATANODE',
            'HDFS_JOURNALNODE': 'JOURNALNODE',
            'HDFS_SECONDARYNAMENODE': 'SECONDARYNAMENODE',
            'YARN_RESOURCEMANAGER': 'RESOURCEMANAGER',
            'YARN_STANDBYRM': 'RESOURCEMANAGER',
            'YARN_NODEMANAGER': 'NODEMANAGER',
            'YARN_JOBHISTORY': 'JOBHISTORY',
            'OOZIE_SERVER': 'OOZIE_SERVER',
            'HIVE_SERVER2': 'HIVESERVER2',
            'HIVE_METASTORE': 'HIVEMETASTORE',
            'HIVE_WEBHCAT': 'WEBHCAT',
            'HUE_SERVER': 'HUE_SERVER',
            'SPARK_YARN_HISTORY_SERVER': 'SPARK_YARN_HISTORY_SERVER',
            'ZOOKEEPER_SERVER': 'SERVER',
            'HBASE_MASTER': 'MASTER',
            'HBASE_REGIONSERVER': 'REGIONSERVER',
            'FLUME_AGENT': 'AGENT',
            'IMPALA_CATALOGSERVER': 'CATALOGSERVER',
            'IMPALA_STATESTORE': 'STATESTORE',
            'IMPALAD': 'IMPALAD',
            'KEY_VALUE_STORE_INDEXER': 'HBASE_INDEXER',
            'SENTRY_SERVER': 'SENTRY_SERVER',
            'SOL_SERVER': 'SOLR_SERVER',
            'SQOOP_SERVER': 'SQOOP_SERVER',
        }
        return name_dict.get(showname, showname)

    def install_packages(self, instances, packages):
        # instances non-empty
        cpo.add_provisioning_step(
            instances[0].cluster_id, _("Install packages"), len(instances))

        with context.ThreadGroup() as tg:
            for i in instances:
                tg.spawn('cdh-inst-pkgs-%s' % i.instance_name,
                         self._install_pkgs, i, packages)

    @cpo.event_wrapper(True)
    def _install_pkgs(self, instance, packages):
        with instance.remote() as r:
            cmd.install_packages(r, packages)

    def start_cloudera_agents(self, instances):
        # instances non-empty
        cpo.add_provisioning_step(
            instances[0].cluster_id, _("Start Cloudera Agents"),
            len(instances))

        with context.ThreadGroup() as tg:
            for i in instances:
                tg.spawn('cdh-agent-start-%s' % i.instance_name,
                         self._start_cloudera_agent, i)

    @cpo.event_wrapper(True)
    def _start_cloudera_agent(self, instance):
        mng_hostname = self.get_manager(instance.cluster).hostname()
        with instance.remote() as r:
            cmd.configure_agent(r, mng_hostname)
            cmd.start_agent(r)

    def configure_swift(self, cluster, instances=None):
        if self.c_helper.is_swift_enabled(cluster):
            if not instances:
                instances = u.get_instances(cluster)
            cpo.add_provisioning_step(
                cluster.id, _("Configure Swift"), len(instances))

            with context.ThreadGroup() as tg:
                for i in instances:
                    tg.spawn('cdh-swift-conf-%s' % i.instance_name,
                             self._configure_swift_to_inst, i)
            swift_helper.install_ssl_certs(instances)

    @cpo.event_wrapper(True)
    def _configure_swift_to_inst(self, instance):
        cluster = instance.cluster
        swift_lib_remote_url = self.c_helper.get_swift_lib_url(cluster)
        with instance.remote() as r:
            if r.execute_command('ls %s/hadoop-openstack.jar' % HADOOP_LIB_DIR,
                                 raise_when_error=False)[0] != 0:
                r.execute_command('sudo curl %s -o %s/hadoop-openstack.jar' % (
                    swift_lib_remote_url, HADOOP_LIB_DIR))

    def put_hive_hdfs_xml(self, cluster):
        servers = self.get_hive_servers(cluster)
        with servers[0].remote() as r:
            conf_path = edp_u.get_hive_shared_conf_path('hdfs')
            r.execute_command(
                'sudo su - -c "hadoop fs -mkdir -p %s" hdfs'
                % os.path.dirname(conf_path))
            r.execute_command(
                'sudo su - -c "hadoop fs -put /etc/hive/conf/hive-site.xml '
                '%s" hdfs' % conf_path)

    def configure_hive(self, cluster):
        manager = self.get_manager(cluster)
        with manager.remote() as r:
            self.db_helper.create_hive_database(cluster, r)

    def create_hive_hive_directory(self, cluster):
        # Hive requires /tmp/hive-hive directory
        namenode = self.get_namenode(cluster)
        with namenode.remote() as r:
            r.execute_command(
                'sudo su - -c "hadoop fs -mkdir -p /tmp/hive-hive" hdfs')
            r.execute_command(
                'sudo su - -c "hadoop fs -chown hive /tmp/hive-hive" hdfs')

    def install_extjs(self, cluster):
        extjs_remote_location = self.c_helper.get_extjs_lib_url(cluster)
        extjs_vm_location_dir = '/var/lib/oozie'
        extjs_vm_location_path = extjs_vm_location_dir + '/extjs.zip'
        with self.get_oozie(cluster).remote() as r:
            if r.execute_command('ls %s/ext-2.2' % extjs_vm_location_dir,
                                 raise_when_error=False)[0] != 0:
                r.execute_command('curl -L -o \'%s\' %s' % (
                    extjs_vm_location_path,  extjs_remote_location),
                    run_as_root=True)
                r.execute_command('unzip %s -d %s' % (
                    extjs_vm_location_path, extjs_vm_location_dir),
                    run_as_root=True)

    def _check_cloudera_manager_started(self, manager):
        try:
            conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
            conn.close()
            return True
        except IOError:
            return False

    @cpo.event_wrapper(
        True, step=_("Start Cloudera Manager"), param=('cluster', 1))
    def _start_cloudera_manager(self, cluster, timeout_config):
        manager = self.get_manager(cluster)
        with manager.remote() as r:
            cmd.start_cloudera_db(r)
            cmd.start_manager(r)
        poll_utils.plugin_option_poll(
            cluster, self._check_cloudera_manager_started, timeout_config,
            _("Await starting Cloudera Manager"), 2, {'manager': manager})

    def configure_os(self, instances):
        # instances non-empty
        cpo.add_provisioning_step(
            instances[0].cluster_id, _("Configure OS"), len(instances))
        with context.ThreadGroup() as tg:
            for inst in instances:
                tg.spawn('cdh-repo-conf-%s' % inst.instance_name,
                         self._configure_repo_from_inst, inst)

    @cpo.event_wrapper(True)
    def _configure_repo_from_inst(self, instance):
        LOG.debug("Configure repos from instance {instance}".format(
                  instance=instance.instance_name))
        cluster = instance.cluster

        cdh5_key = self.c_helper.get_cdh5_key_url(cluster)
        cm5_key = self.c_helper.get_cm5_key_url(cluster)

        with instance.remote() as r:
            if cmd.is_ubuntu_os(r):
                cdh5_key = (cdh5_key or
                            self.c_helper.DEFAULT_CDH5_UBUNTU_REPO_KEY_URL)
                cm5_key = (cm5_key or
                           self.c_helper.DEFAULT_CM5_UBUNTU_REPO_KEY_URL)

                cdh5_repo_content = self.c_helper.CDH5_UBUNTU_REPO
                cm5_repo_content = self.c_helper.CM5_UBUNTU_REPO

                cmd.write_ubuntu_repository(r, cdh5_repo_content, 'cdh')
                cmd.add_apt_key(r, cdh5_key)
                cmd.write_ubuntu_repository(r, cm5_repo_content, 'cm')
                cmd.add_apt_key(r, cm5_key)
                cmd.update_repository(r)

            if cmd.is_centos_os(r):
                cdh5_repo_content = self.c_helper.CDH5_CENTOS_REPO
                cm5_repo_content = self.c_helper.CM5_CENTOS_REPO

                cmd.write_centos_repository(r, cdh5_repo_content, 'cdh')
                cmd.write_centos_repository(r, cm5_repo_content, 'cm')
                cmd.update_repository(r)

    def _get_config_value(self, service, name, configs, cluster=None):
        if cluster:
            conf = cluster.cluster_configs
            if service in conf and name in conf[service]:
                return types.transform_to_num(conf[service][name])
        for config in configs:
            if config.applicable_target == service and config.name == name:
                return types.transform_to_num(config.default_value)
        raise exc.InvalidDataException(
            _("Unable to find config: {applicable_target: %(target)s, name: "
              "%(name)s").format(target=service, name=name))

    def recommend_configs(self, cluster, plugin_configs, scaling):
        provider = CDHPluginAutoConfigsProvider(
            AUTO_CONFIGURATION_SCHEMA, plugin_configs, cluster, scaling)
        provider.apply_recommended_configs()
Example #45
0
def check_plugin_name_exists(name):
    if name not in [p.name for p in api.get_plugins()]:
        raise ex.InvalidReferenceException(
            _("Sahara doesn't contain plugin with name '%s'") % name)
Example #46
0
 def run_scheduled_job(self, job_execution):
     raise e.NotImplementedException(_("Currently Spark engine does not"
                                       " support scheduled EDP jobs"))
Example #47
0
def check_node_group_template_unique_name(name):
    if name in [t.name for t in api.get_node_group_templates()]:
        raise ex.NameAlreadyExistsException(
            _("NodeGroup template with name '%s' already exists") % name)
Example #48
0
def check_image_registered(image_id):
    if image_id not in [i.id for i in nova.client().images.list_registered()]:
        raise ex.InvalidReferenceException(
            _("Requested image '%s' is not registered") % image_id)
Example #49
0
def check_cluster_template_unique_name(name):
    if name in [t.name for t in api.get_cluster_templates()]:
        raise ex.NameAlreadyExistsException(
            _("Cluster template with name '%s' already exists") % name)
Example #50
0
def check_node_group_template_exists(ng_tmpl_id):
    if not api.get_node_group_template(id=ng_tmpl_id):
        raise ex.NotFoundException(
            ng_tmpl_id, _("NodeGroup template with id '%s' not found"))
Example #51
0
def check_keypair_exists(keypair):
    try:
        nova.client().keypairs.get(keypair)
    except nova_ex.NotFound:
        raise ex.NotFoundException(keypair,
                                   _("Requested keypair '%s' not found"))
Example #52
0
def check_cluster_template_exists(cluster_template_id):
    if not api.get_cluster_template(id=cluster_template_id):
        raise ex.NotFoundException(
            cluster_template_id, _("Cluster template with id '%s' not found"))
Example #53
0
def check_volume_type_exists(volume_type):
    volume_types = cinder.client().volume_types.list(
        search_opts={'name': volume_type})
    if len(volume_types) == 1 and volume_types[0] == volume_type:
        return
    raise ex.NotFoundException(volume_type, _("Volume type '%s' not found"))
Example #54
0
def check_network_exists(net_id):
    if not nova.get_network(id=net_id):
        raise ex.NotFoundException(net_id, _("Network %s not found"))
Example #55
0
def check_availability_zone_exist(az):
    az_list = nova.client().availability_zones.list(False)
    az_names = [a.zoneName for a in az_list]
    if az not in az_names:
        raise ex.NotFoundException(az,
                                   _("Nova availability zone '%s' not found"))
Example #56
0
def check_cluster_unique_name(name):
    if name in [cluster.name for cluster in api.get_clusters()]:
        raise ex.NameAlreadyExistsException(
            _("Cluster with name '%s' already exists") % name)
    check_heat_stack_name(name)
Example #57
0
def check_flavor_exists(flavor_id):
    flavor_list = nova.client().flavors.list()
    if flavor_id not in [flavor.id for flavor in flavor_list]:
        raise ex.NotFoundException(flavor_id,
                                   _("Requested flavor '%s' not found"))
Example #58
0
def check_volume_availability_zone_exist(az):
    az_list = cinder.client().availability_zones.list()
    az_names = [a.zoneName for a in az_list]
    if az not in az_names:
        raise ex.NotFoundException(
            az, _("Cinder availability zone '%s' not found"))
Example #59
0
                                                  '/hdfs/datanode')
    dirs['hadoop_log_dir'] = _make_hadoop_paths(storage_paths,
                                                '/hadoop/logs')[0]
    dirs['hadoop_secure_dn_log_dir'] = _make_hadoop_paths(
        storage_paths, '/hadoop/logs/secure')[0]
    dirs['yarn_log_dir'] = _make_hadoop_paths(storage_paths, '/yarn/logs')[0]

    return dirs


def _make_hadoop_paths(paths, hadoop_dir):
    return [path + hadoop_dir for path in paths]


@cpo.event_wrapper(True,
                   step=_("Configure topology data"),
                   param=('cluster', 1))
def configure_topology_data(pctx, cluster):
    if c_helper.is_data_locality_enabled(pctx, cluster):
        LOG.warning(
            _LW("Node group awareness is not implemented in YARN yet "
                "so enable_hypervisor_awareness set to False "
                "explicitly"))
        tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
        topology_data = "\n".join([k + " " + v
                                   for k, v in tpl_map.items()]) + "\n"
        for ng in cluster.node_groups:
            for i in ng.instances:
                i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
                                         topology_data,
                                         run_as_root=True)
Example #60
0
def check_duplicates_node_groups_names(node_groups):
    ng_names = [ng['name'] for ng in node_groups]
    if len(set(ng_names)) < len(node_groups):
        raise ex.InvalidDataException(
            _("Duplicates in node group names are detected"))