def _install_services(self, cluster_name, ambari_info):

        ambari_address = ambari_info.get_address()
        install_url = ('http://{0}/api/v1/clusters/{'
                       '1}/services?ServiceInfo/state=INIT'.format(
                           ambari_address, cluster_name))
        body = ('{"RequestInfo" : { "context" : "Install all services" },'
                '"Body" : {"ServiceInfo": {"state" : "INSTALLED"}}}')

        result = self._put(install_url, ambari_info, data=body)

        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(
                self._get_async_request_uri(ambari_info, cluster_name,
                                            request_id), ambari_info)
            if success:
                LOG.info(_LI("Hadoop stack installed successfully."))
                self._finalize_ambari_state(ambari_info)
            else:
                LOG.error(_LE('Install command failed.'))
                raise ex.HadoopProvisionError(
                    _('Installation of Hadoop stack failed.'))
        elif result.status_code != 200:
            LOG.error(
                _LE('Install command failed. {result}').format(
                    result=result.text))
            raise ex.HadoopProvisionError(
                _('Installation of Hadoop stack failed.'))
    def _install_services(self, cluster_name, ambari_info):

        ambari_address = ambari_info.get_address()
        install_url = ('http://{0}/api/v1/clusters/{'
                       '1}/services?ServiceInfo/state=INIT'.format(
                           ambari_address, cluster_name))
        body = ('{"RequestInfo" : { "context" : "Install all services" },'
                '"Body" : {"ServiceInfo": {"state" : "INSTALLED"}}}')

        result = self._put(install_url, ambari_info, data=body)

        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(self._get_async_request_uri(
                ambari_info, cluster_name, request_id),
                ambari_info)
            if success:
                LOG.info(_LI("Hadoop stack installed successfully."))
                self._finalize_ambari_state(ambari_info)
            else:
                LOG.error(_LE('Install command failed.'))
                raise ex.HadoopProvisionError(
                    _('Installation of Hadoop stack failed.'))
        elif result.status_code != 200:
            LOG.error(
                _LE('Install command failed. {result}').format(
                    result=result.text))
            raise ex.HadoopProvisionError(
                _('Installation of Hadoop stack failed.'))
Exemple #3
0
    def start_services(self, cluster_name, cluster_spec, ambari_info):
        start_url = ('http://{0}/api/v1/clusters/{1}/services?ServiceInfo/'
                     'state=INSTALLED'.format(ambari_info.get_address(),
                                              cluster_name))
        body = ('{"RequestInfo" : { "context" : "Start all services" },'
                '"Body" : {"ServiceInfo": {"state" : "STARTED"}}}')

        self._fire_service_start_notifications(cluster_name, cluster_spec,
                                               ambari_info)
        result = self._put(start_url, ambari_info, data=body)
        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(
                self._get_async_request_uri(ambari_info, cluster_name,
                                            request_id), ambari_info)
            if success:
                LOG.info(_LI("Successfully started Hadoop cluster."))
                LOG.info(
                    _LI('Ambari server address: {server_address}').format(
                        server_address=ambari_info.get_address()))

            else:
                LOG.error(_LE('Failed to start Hadoop cluster.'))
                raise ex.HadoopProvisionError(
                    _('Start of Hadoop services failed.'))

        elif result.status_code != 200:
            LOG.error(
                _LE('Start command failed. Status: {status}, '
                    'response: {response}').format(status=result.status_code,
                                                   response=result.text))
            raise ex.HadoopProvisionError(
                _('Start of Hadoop services failed.'))
    def start_services(self, cluster_name, cluster_spec, ambari_info):
        start_url = ('http://{0}/api/v1/clusters/{1}/services?ServiceInfo/'
                     'state=INSTALLED'.format(
                         ambari_info.get_address(), cluster_name))
        body = ('{"RequestInfo" : { "context" : "Start all services" },'
                '"Body" : {"ServiceInfo": {"state" : "STARTED"}}}')

        self._fire_service_start_notifications(
            cluster_name, cluster_spec, ambari_info)
        result = self._put(start_url, ambari_info, data=body)
        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(
                self._get_async_request_uri(ambari_info, cluster_name,
                                            request_id), ambari_info)
            if success:
                LOG.info(
                    _LI("Successfully started Hadoop cluster."))
                LOG.info(_LI('Ambari server address: {server_address}')
                         .format(server_address=ambari_info.get_address()))

            else:
                LOG.error(_LE('Failed to start Hadoop cluster.'))
                raise ex.HadoopProvisionError(
                    _('Start of Hadoop services failed.'))

        elif result.status_code != 200:
            LOG.error(
                _LE('Start command failed. Status: {status}, '
                    'response: {response}').format(status=result.status_code,
                                                   response=result.text))
            raise ex.HadoopProvisionError(
                _('Start of Hadoop services failed.'))
Exemple #5
0
def _provision_cluster(cluster_id):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    if CONF.use_identity_api_v3 and cluster.is_transient:
        trusts.create_trust_for_cluster(cluster)

    # updating cluster infra
    cluster = g.change_cluster_status(cluster, "InfraUpdating")
    plugin.update_infra(cluster)

    # creating instances and configuring them
    cluster = conductor.cluster_get(ctx, cluster_id)
    INFRA.create_cluster(cluster)

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # configure cluster
    cluster = g.change_cluster_status(cluster, "Configuring")
    try:
        plugin.configure_cluster(cluster)
    except Exception as ex:
        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return
        LOG.exception(
            _LE("Can't configure cluster '%(name)s' (reason: %(reason)s)"),
            {'name': cluster.name, 'reason': ex})
        g.change_cluster_status(cluster, "Error")
        return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # starting prepared and configured cluster
    cluster = g.change_cluster_status(cluster, "Starting")
    try:
        plugin.start_cluster(cluster)
    except Exception as ex:
        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return
        LOG.exception(
            _LE("Can't start services for cluster '%(name)s' (reason: "
                "%(reason)s)"), {'name': cluster.name, 'reason': ex})
        g.change_cluster_status(cluster, "Error")
        return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # cluster is now up and ready
    cluster = g.change_cluster_status(cluster, "Active")

    # schedule execution pending job for cluster
    for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id):
        job_manager.run_job(je.id)
Exemple #6
0
    def _add_hosts_and_components(
            self, cluster_spec, servers, ambari_info, name):

        add_host_url = 'http://{0}/api/v1/clusters/{1}/hosts/{2}'
        add_host_component_url = ('http://{0}/api/v1/clusters/{1}'
                                  '/hosts/{2}/host_components/{3}')
        for host in servers:
            hostname = host.instance.fqdn().lower()
            result = self._post(
                add_host_url.format(ambari_info.get_address(), name, hostname),
                ambari_info)
            if result.status_code != 201:
                LOG.error(
                    _LE('Create host command failed. {0}').format(result.text))
                raise ex.HadoopProvisionError(
                    _('Failed to add host: %s') % result.text)

            node_group_name = host.node_group.name
            # TODO(jspeidel): ensure that node group exists
            node_group = cluster_spec.node_groups[node_group_name]
            for component in node_group.components:
                # don't add any AMBARI components
                if component.find('AMBARI') != 0:
                    result = self._post(add_host_component_url.format(
                        ambari_info.get_address(), name, hostname, component),
                        ambari_info)
                    if result.status_code != 201:
                        LOG.error(
                            _LE('Create host_component command failed. %s'),
                            result.text)
                        raise ex.HadoopProvisionError(
                            _('Failed to add host component: %s')
                            % result.text)
Exemple #7
0
        def wrapper(cluster_id, *args, **kwds):
            ctx = context.ctx()
            try:
                # Clearing status description before executing
                c_u.change_cluster_status_description(cluster_id, "")
                f(cluster_id, *args, **kwds)
            except Exception as ex:
                # something happened during cluster operation
                cluster = conductor.cluster_get(ctx, cluster_id)
                # check if cluster still exists (it might have been removed)
                if (cluster is None
                        or cluster.status == c_u.CLUSTER_STATUS_DELETING):
                    LOG.debug("Cluster was deleted or marked for deletion. "
                              "Canceling current operation.")
                    return

                msg = six.text_type(ex)
                LOG.exception(
                    _LE("Error during operating on cluster (reason: "
                        "{reason})").format(reason=msg))

                try:
                    # trying to rollback
                    desc = description.format(reason=msg)
                    if _rollback_cluster(cluster, ex):
                        c_u.change_cluster_status(cluster,
                                                  c_u.CLUSTER_STATUS_ACTIVE,
                                                  desc)
                    else:
                        c_u.change_cluster_status(cluster,
                                                  c_u.CLUSTER_STATUS_ERROR,
                                                  desc)
                except Exception as rex:
                    cluster = conductor.cluster_get(ctx, cluster_id)
                    # check if cluster still exists (it might have been
                    # removed during rollback)
                    if (cluster is None
                            or cluster.status == c_u.CLUSTER_STATUS_DELETING):
                        LOG.debug("Cluster was deleted or marked for deletion."
                                  " Canceling current operation.")
                        return

                    LOG.exception(
                        _LE("Error during rollback of cluster (reason:"
                            " {reason})").format(reason=six.text_type(rex)))
                    desc = "{0}, {1}".format(msg, six.text_type(rex))
                    c_u.change_cluster_status(cluster,
                                              c_u.CLUSTER_STATUS_ERROR,
                                              description.format(reason=desc))
Exemple #8
0
def _delete_job_execution(job_execution_id):
    try:
        _cancel_job_execution(job_execution_id)
    except exceptions.CancelingFailed:
        LOG.error(_LE("Job execution %s can't be cancelled in time. "
                      "Deleting it anyway."), job_execution_id)
    conductor.job_execution_destroy(context.ctx(), job_execution_id)
    def _migrate_up(self, engine, version, with_data=False):
        """migrate up to a new version of the db.

        We allow for data insertion and post checks at every
        migration version with special _pre_upgrade_### and
        _check_### functions in the main test.
        """
        # NOTE(sdague): try block is here because it's impossible to debug
        # where a failed data migration happens otherwise
        check_version = version
        try:
            if with_data:
                data = None
                pre_upgrade = getattr(self, "_pre_upgrade_%s" % check_version,
                                      None)
                if pre_upgrade:
                    data = pre_upgrade(engine)
            self._migrate(engine, version, 'upgrade')
            self.assertEqual(version, self._get_version_from_db(engine))
            if with_data:
                check = getattr(self, "_check_%s" % check_version, None)
                if check:
                    check(engine, data)
        except Exception:
            LOG.error(
                _LE("Failed to migrate to version {version} on engine "
                    "{engine}").format(version=version, engine=engine))
            raise
Exemple #10
0
def create_trust(trustor, trustee, role_names, impersonation=True, project_id=None, expires=True):
    """Create a trust and return it's identifier

    :param trustor: The Keystone client delegating the trust.
    :param trustee: The Keystone client consuming the trust.
    :param role_names: A list of role names to be assigned.
    :param impersonation: Should the trustee impersonate trustor,
                          default is True.
    :param project_id: The project that the trust will be scoped into,
                       default is the trustor's project id.
    :param expires: The trust will expire if this is set to True.
    :returns: A valid trust id.
    :raises CreationFailed: If the trust cannot be created.

    """
    if project_id is None:
        project_id = trustor.tenant_id
    try:
        expires_at = _get_expiry() if expires else None
        trust = trustor.trusts.create(
            trustor_user=trustor.user_id,
            trustee_user=trustee.user_id,
            impersonation=impersonation,
            role_names=role_names,
            project=project_id,
            expires_at=expires_at,
        )
        LOG.debug("Created trust {trust_id}".format(trust_id=six.text_type(trust.id)))
        return trust.id
    except Exception as e:
        LOG.error(_LE("Unable to create trust (reason: {reason})").format(reason=e))
        raise ex.CreationFailed(_("Failed to create trust"))
Exemple #11
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume {volume}".format(volume=volume_id))
    volume = cinder.get_volume(volume_id)
    try:
        b.execute_with_retries(volume.delete)
    except Exception:
        LOG.error(_LE("Can't delete volume {volume}").format(volume=volume.id))
Exemple #12
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume %s" % volume_id)
    volume = cinder.get_volume(volume_id)
    try:
        volume.delete()
    except Exception:
        LOG.exception(_LE("Can't delete volume %s"), volume.id)
Exemple #13
0
def _detach_volume(instance, volume_id):
    volume = cinder.get_volume(volume_id)
    try:
        LOG.debug("Detaching volume %s from instance %s" % (
            volume_id, instance.instance_name))
        nova.client().volumes.delete_server_volume(instance.instance_id,
                                                   volume_id)
    except Exception:
        LOG.exception(_LE("Can't detach volume %s"), volume.id)

    detach_timeout = CONF.detach_volume_timeout
    LOG.debug("Waiting %d seconds to detach %s volume" % (detach_timeout,
                                                          volume_id))
    s_time = tu.utcnow()
    while tu.delta_seconds(s_time, tu.utcnow()) < detach_timeout:
        volume = cinder.get_volume(volume_id)
        if volume.status not in ['available', 'error']:
            context.sleep(2)
        else:
            LOG.debug("Volume %s has been detached" % volume_id)
            return
    else:
        LOG.warn(_LW("Can't detach volume %(volume)s. "
                     "Current status of volume: %(status)s"),
                 {'volume': volume_id, 'status': volume.status})
Exemple #14
0
def execute_job(job_id, data):
    # Elements common to all job types
    cluster_id = data['cluster_id']
    configs = data.get('job_configs', {})

    # Not in Java job types but present for all others
    input_id = data.get('input_id', None)
    output_id = data.get('output_id', None)

    # Since we will use a unified class in the database, we pass
    # a superset for all job types
    job_ex_dict = {'input_id': input_id, 'output_id': output_id,
                   'job_id': job_id, 'cluster_id': cluster_id,
                   'info': {'status': edp.JOB_STATUS_PENDING},
                   'job_configs': configs, 'extra': {}}
    job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)

    # check to use proxy user
    if p.job_execution_requires_proxy_user(job_execution):
        try:
            p.create_proxy_user_for_job_execution(job_execution)
        except ex.SaharaException as e:
            LOG.exception(_LE("Can't run job execution '{0}' "
                              "(reasons: {1})").format(job_execution.id, e))
            conductor.job_execution_destroy(context.ctx(), job_execution)
            raise e

    OPS.run_edp_job(job_execution.id)

    return job_execution
Exemple #15
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume {volume}".format(volume=volume_id))
    volume = cinder.get_volume(volume_id)
    try:
        volume.delete()
    except Exception:
        LOG.error(_LE("Can't delete volume {volume}").format(volume=volume.id))
Exemple #16
0
def update_job_status(job_execution_id):
    try:
        get_job_status(job_execution_id)
    except Exception as e:
        LOG.exception(
            _LE("Error during update job execution {job}: {error}").format(
                job=job_execution_id, error=e))
Exemple #17
0
    def _build_proxy_command(self, command, instance=None, port=None,
                             info=None, rootwrap_command=None):
        # Accepted keywords in the proxy command template:
        # {host}, {port}, {tenant_id}, {network_id}, {router_id}
        keywords = {}

        if not info:
            info = self.get_neutron_info(instance)
        keywords['tenant_id'] = context.current().tenant_id
        keywords['network_id'] = info['network']

        # Query Neutron only if needed
        if '{router_id}' in command:
            client = neutron.NeutronClient(info['network'], info['token'],
                                           info['tenant'])
            keywords['router_id'] = client.get_router()

        keywords['host'] = instance.management_ip
        keywords['port'] = port

        try:
            command = command.format(**keywords)
        except KeyError as e:
            LOG.error(_LE('Invalid keyword in proxy_command: {result}').format(
                result=e))
            # Do not give more details to the end-user
            raise ex.SystemError('Misconfiguration')
        if rootwrap_command:
            command = '{0} {1}'.format(rootwrap_command, command)
        return command
Exemple #18
0
    def _hdfs_ha_update_host_component(self, hac, host, component, state):

        update_host_component_url = ('http://{0}/api/v1/clusters/{1}'
                                     '/hosts/{2}/host_components/{3}').format(
                                         hac['ambari_info'].get_address(),
                                         hac['name'], host, component)
        component_state = {"HostRoles": {"state": state}}
        body = json.dumps(component_state)

        result = self._put(update_host_component_url,
                           hac['ambari_info'], data=body)

        if result.status_code == 202:
            json_result = json.loads(result.text)
            request_id = json_result['Requests']['id']
            success = self._wait_for_async_request(self._get_async_request_uri(
                hac['ambari_info'], hac['name'], request_id),
                hac['ambari_info'])
            if success:
                LOG.info(_LI("HDFS-HA: Host component updated successfully: "
                             "{0} {1}").format(host, component))
            else:
                LOG.critical(_LC("HDFS-HA: Host component update failed: "
                                 "{0} {1}").format(host, component))
                raise ex.NameNodeHAConfigurationError(
                    'Configuring HDFS HA failed. %s' % result.text)
        elif result.status_code != 200:
            LOG.error(
                _LE('Configuring HDFS HA failed. {0}').format(result.text))
            raise ex.NameNodeHAConfigurationError(
                'Configuring HDFS HA failed. %s' % result.text)
Exemple #19
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume %s" % volume_id)
    volume = cinder.get_volume(volume_id)
    try:
        volume.delete()
    except Exception:
        LOG.exception(_LE("Can't delete volume %s"), volume.id)
Exemple #20
0
def _detach_volume(instance, volume_id):
    volume = cinder.get_volume(volume_id)
    try:
        LOG.debug("Detaching volume %s from instance %s" %
                  (volume_id, instance.instance_name))
        nova.client().volumes.delete_server_volume(instance.instance_id,
                                                   volume_id)
    except Exception:
        LOG.exception(_LE("Can't detach volume %s"), volume.id)

    detach_timeout = CONF.detach_volume_timeout
    LOG.debug("Waiting %d seconds to detach %s volume" %
              (detach_timeout, volume_id))
    s_time = tu.utcnow()
    while tu.delta_seconds(s_time, tu.utcnow()) < detach_timeout:
        volume = cinder.get_volume(volume_id)
        if volume.status not in ['available', 'error']:
            context.sleep(2)
        else:
            LOG.debug("Volume %s has been detached" % volume_id)
            return
    else:
        LOG.warn(
            _LW("Can't detach volume %(volume)s. "
                "Current status of volume: %(status)s"), {
                    'volume': volume_id,
                    'status': volume.status
                })
Exemple #21
0
def execute_job(job_id, data):
    # Elements common to all job types
    cluster_id = data['cluster_id']
    configs = data.get('job_configs', {})
    interface = data.get('interface', {})

    # Not in Java job types but present for all others
    input_id = data.get('input_id', None)
    output_id = data.get('output_id', None)

    # Since we will use a unified class in the database, we pass
    # a superset for all job types
    job_ex_dict = {'input_id': input_id, 'output_id': output_id,
                   'job_id': job_id, 'cluster_id': cluster_id,
                   'info': {'status': edp.JOB_STATUS_PENDING},
                   'job_configs': configs, 'extra': {},
                   'interface': interface}
    job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)
    context.set_current_job_execution_id(job_execution.id)

    # check to use proxy user
    if p.job_execution_requires_proxy_user(job_execution):
        try:
            p.create_proxy_user_for_job_execution(job_execution)
        except ex.SaharaException as e:
            LOG.error(_LE("Can't run job execution. "
                          "(Reasons: {reason})").format(reason=e))
            conductor.job_execution_destroy(context.ctx(), job_execution)
            raise e

    OPS.run_edp_job(job_execution.id)

    return job_execution
Exemple #22
0
    def invoke(self, method, relpath=None, params=None, data=None,
               headers=None):
        """Invoke an API method

        :return: Raw body or JSON dictionary (if response content type is
                 JSON).
        """
        path = self._join_uri(relpath)
        resp = self._client.execute(method,
                                    path,
                                    params=params,
                                    data=data,
                                    headers=headers)
        try:
            body = resp.read()
        except Exception as ex:
            raise ex.CMApiException(
                _("Command %(method)s %(path)s failed: %(msg)s")
                % {'method': method, 'path': path, 'msg': six.text_type(ex)})

        LOG.debug("{method} got response: {body}".format(method=method,
                                                         body=body[:32]))
        # Is the response application/json?
        if (len(body) != 0 and resp.info().getmaintype() == "application"
                and resp.info().getsubtype() == "json"):
            try:
                json_dict = json.loads(body)
                return json_dict
            except Exception as ex:
                LOG.error(_LE('JSON decode error: {body}').format(body=body))
                raise ex
        else:
            return body
    def _migrate_up(self, engine, version, with_data=False):
        """migrate up to a new version of the db.

        We allow for data insertion and post checks at every
        migration version with special _pre_upgrade_### and
        _check_### functions in the main test.
        """
        # NOTE(sdague): try block is here because it's impossible to debug
        # where a failed data migration happens otherwise
        check_version = version
        try:
            if with_data:
                data = None
                pre_upgrade = getattr(
                    self, "_pre_upgrade_%s" % check_version, None)
                if pre_upgrade:
                    data = pre_upgrade(engine)
            self._migrate(engine, version, 'upgrade')
            self.assertEqual(version, self._get_version_from_db(engine))
            if with_data:
                check = getattr(self, "_check_%s" % check_version, None)
                if check:
                    check(engine, data)
        except Exception:
            LOG.error(_LE("Failed to migrate to version {version} on engine "
                      "{engine}").format(version=version, engine=engine))
            raise
Exemple #24
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume {volume}".format(volume=volume_id))
    volume = cinder.get_volume(volume_id)
    try:
        b.execute_with_retries(volume.delete)
    except Exception:
        LOG.error(_LE("Can't delete volume {volume}").format(volume=volume.id))
Exemple #25
0
    def _build_proxy_command(self,
                             command,
                             instance=None,
                             port=None,
                             info=None,
                             rootwrap_command=None):
        # Accepted keywords in the proxy command template:
        # {host}, {port}, {tenant_id}, {network_id}, {router_id}
        keywords = {}

        if not info:
            info = self.get_neutron_info(instance)
        keywords['tenant_id'] = context.current().tenant_id
        keywords['network_id'] = info['network']

        # Query Neutron only if needed
        if '{router_id}' in command:
            client = neutron.NeutronClient(info['network'], info['uri'],
                                           info['token'], info['tenant'])
            keywords['router_id'] = client.get_router()

        keywords['host'] = instance.management_ip
        keywords['port'] = port

        try:
            command = command.format(**keywords)
        except KeyError as e:
            LOG.error(
                _LE('Invalid keyword in proxy_command: {result}').format(
                    result=e))
            # Do not give more details to the end-user
            raise ex.SystemError('Misconfiguration')
        if rootwrap_command:
            command = '{0} {1}'.format(rootwrap_command, command)
        return command
Exemple #26
0
def create_trust(trustor,
                 trustee,
                 role_names,
                 impersonation=True,
                 project_id=None):
    '''Create a trust and return it's identifier

    :param trustor: The Keystone client delegating the trust.
    :param trustee: The Keystone client consuming the trust.
    :param role_names: A list of role names to be assigned.
    :param impersonation: Should the trustee impersonate trustor,
                          default is True.
    :param project_id: The project that the trust will be scoped into,
                       default is the trustor's project id.
    :returns: A valid trust id.
    :raises CreationFailed: If the trust cannot be created.

    '''
    if project_id is None:
        project_id = trustor.tenant_id
    try:
        trust = trustor.trusts.create(trustor_user=trustor.user_id,
                                      trustee_user=trustee.user_id,
                                      impersonation=impersonation,
                                      role_names=role_names,
                                      project=project_id)
        LOG.debug('Created trust {0}'.format(six.text_type(trust.id)))
        return trust.id
    except Exception as e:
        LOG.exception(_LE('Unable to create trust (reason: %s)'), e)
        raise ex.CreationFailed(_('Failed to create trust'))
Exemple #27
0
    def _exec_ambari_command(self, ambari_info, body, cmd_uri):

        LOG.debug('PUT URI: {0}'.format(cmd_uri))
        result = self._put(cmd_uri, ambari_info, data=body)
        if result.status_code == 202:
            LOG.debug(
                'PUT response: {0}'.format(result.text))
            json_result = json.loads(result.text)
            href = json_result['href'] + '/tasks?fields=Tasks/status'
            success = self._wait_for_async_request(href, ambari_info)
            if success:
                LOG.info(
                    _LI("Successfully changed state of Hadoop components "))
            else:
                LOG.critical(_LC('Failed to change state of Hadoop '
                                 'components'))
                raise ex.HadoopProvisionError(
                    _('Failed to change state of Hadoop components'))

        else:
            LOG.error(
                _LE('Command failed. Status: %(status)s, response: '
                    '%(response)s'),
                {'status': result.status_code, 'response': result.text})
            raise ex.HadoopProvisionError(_('Hadoop/Ambari command failed.'))
Exemple #28
0
def create_trust(trustor,
                 trustee,
                 role_names,
                 impersonation=True,
                 project_id=None):
    '''Create a trust and return it's identifier

    :param trustor: The Keystone client delegating the trust.
    :param trustee: The Keystone client consuming the trust.
    :param role_names: A list of role names to be assigned.
    :param impersonation: Should the trustee impersonate trustor,
                          default is True.
    :param project_id: The project that the trust will be scoped into,
                       default is the trustor's project id.
    :returns: A valid trust id.
    :raises CreationFailed: If the trust cannot be created.

    '''
    if project_id is None:
        project_id = trustor.tenant_id
    try:
        trust = trustor.trusts.create(trustor_user=trustor.user_id,
                                      trustee_user=trustee.user_id,
                                      impersonation=impersonation,
                                      role_names=role_names,
                                      project=project_id)
        LOG.debug('Created trust {0}'.format(six.text_type(trust.id)))
        return trust.id
    except Exception as e:
        LOG.exception(_LE('Unable to create trust (reason: %s)'), e)
        raise ex.CreationFailed(_('Failed to create trust'))
Exemple #29
0
def execute_with_retries(method, *args, **kwargs):
    attempts = CONF.retries.retries_number + 1
    while attempts > 0:
        try:
            return method(*args, **kwargs)
        except Exception as e:
            error_code = getattr(e, 'http_status', None) or getattr(
                e, 'status_code', None) or getattr(e, 'code', None)
            if error_code in ERRORS_TO_RETRY:
                LOG.warning(_LW('Occasional error occured during "{method}" '
                                'execution: {error_msg} ({error_code}). '
                                'Operation will be retried.').format(
                            method=method.__name__,
                            error_msg=e,
                            error_code=error_code))
                attempts -= 1
                retry_after = getattr(e, 'retry_after', 0)
                context.sleep(max(retry_after, CONF.retries.retry_after))
            else:
                LOG.error(_LE('Permanent error occured during "{method}" '
                              'execution: {error_msg}.').format(
                          method=method.__name__,
                          error_msg=e))
                raise e
    else:
        raise ex.MaxRetriesExceeded(attempts, method.__name__)
Exemple #30
0
        def wrapper(cluster_id, *args, **kwds):
            ctx = context.ctx()
            try:
                # Clearing status description before executing
                g.change_cluster_status_description(cluster_id, "")
                f(cluster_id, *args, **kwds)
            except Exception as ex:
                # something happened during cluster operation
                cluster = conductor.cluster_get(ctx, cluster_id)
                # check if cluster still exists (it might have been removed)
                if cluster is None or cluster.status == 'Deleting':
                    LOG.debug(
                        "Cluster id={id} was deleted or marked for "
                        "deletion. Canceling current operation.".format(
                            id=cluster_id))
                    return

                msg = six.text_type(ex)
                LOG.error(
                    _LE("Error during operating on cluster {name} (reason: "
                        "{reason})").format(name=cluster.name, reason=msg))

                try:
                    # trying to rollback
                    desc = description.format(reason=msg)
                    if _rollback_cluster(cluster, ex):
                        g.change_cluster_status(cluster, "Active", desc)
                    else:
                        g.change_cluster_status(cluster, "Error", desc)
                except Exception as rex:
                    cluster = conductor.cluster_get(ctx, cluster_id)
                    # check if cluster still exists (it might have been
                    # removed during rollback)
                    if cluster is None or cluster.status == 'Deleting':
                        LOG.debug(
                            "Cluster id={id} was deleted or marked for "
                            "deletion. Canceling current operation."
                            .format(id=cluster_id))
                        return

                    LOG.error(
                        _LE("Error during rollback of cluster {name} (reason:"
                            " {reason})").format(name=cluster.name,
                                                 reason=six.text_type(rex)))
                    desc = "{0}, {1}".format(msg, six.text_type(rex))
                    g.change_cluster_status(
                        cluster, "Error", description.format(reason=desc))
Exemple #31
0
        def wrapper(cluster_id, *args, **kwds):
            ctx = context.ctx()
            try:
                # Clearing status description before executing
                c_u.change_cluster_status_description(cluster_id, "")
                f(cluster_id, *args, **kwds)
            except Exception as ex:
                # something happened during cluster operation
                cluster = conductor.cluster_get(ctx, cluster_id)
                # check if cluster still exists (it might have been removed)
                if (cluster is None or
                        cluster.status == c_u.CLUSTER_STATUS_DELETING):
                    LOG.debug("Cluster was deleted or marked for deletion. "
                              "Canceling current operation.")
                    return

                msg = six.text_type(ex)
                LOG.exception(_LE("Error during operating on cluster (reason: "
                                  "{reason})").format(reason=msg))

                try:
                    # trying to rollback
                    desc = description.format(reason=msg)
                    if _rollback_cluster(cluster, ex):
                        c_u.change_cluster_status(
                            cluster, c_u.CLUSTER_STATUS_ACTIVE, desc)
                    else:
                        c_u.change_cluster_status(
                            cluster, c_u.CLUSTER_STATUS_ERROR, desc)
                except Exception as rex:
                    cluster = conductor.cluster_get(ctx, cluster_id)
                    # check if cluster still exists (it might have been
                    # removed during rollback)
                    if (cluster is None or
                            cluster.status == c_u.CLUSTER_STATUS_DELETING):
                        LOG.debug("Cluster was deleted or marked for deletion."
                                  " Canceling current operation.")
                        return

                    LOG.exception(
                        _LE("Error during rollback of cluster (reason:"
                            " {reason})").format(reason=six.text_type(rex)))
                    desc = "{0}, {1}".format(msg, six.text_type(rex))
                    c_u.change_cluster_status(
                        cluster, c_u.CLUSTER_STATUS_ERROR,
                        description.format(reason=desc))
Exemple #32
0
def setup_db():
    try:
        engine = get_engine()
        m.Cluster.metadata.create_all(engine)
    except sa.exc.OperationalError as e:
        LOG.exception(_LE("Database registration exception: %s"), e)
        return False
    return True
Exemple #33
0
def setup_db():
    try:
        engine = get_engine()
        m.Cluster.metadata.create_all(engine)
    except sa.exc.OperationalError as e:
        LOG.exception(_LE("Database registration exception: %s"), e)
        return False
    return True
Exemple #34
0
def drop_db():
    try:
        engine = get_engine()
        m.Cluster.metadata.drop_all(engine)
    except Exception as e:
        LOG.exception(_LE("Database shutdown exception: %s"), e)
        return False
    return True
Exemple #35
0
def drop_db():
    try:
        engine = get_engine()
        m.Cluster.metadata.drop_all(engine)
    except Exception as e:
        LOG.exception(_LE("Database shutdown exception: %s"), e)
        return False
    return True
def update_job_statuses():
    ctx = context.ctx()
    for je in conductor.job_execution_get_all(ctx, end_time=None):
        try:
            get_job_status(je.id)
        except Exception as e:
            LOG.error(_LE("Error during update job execution {job}: {error}")
                      .format(job=je.id, error=e))
Exemple #37
0
def _delete_volume(volume_id):
    LOG.debug("Deleting volume {volume}".format(volume=volume_id))
    volume = cinder.get_volume(volume_id)
    try:
        volume.delete()
    except Exception:
        LOG.error(_LE("Can't delete volume {volume}").format(
            volume=volume.id))
Exemple #38
0
def validate_cluster_creating(cluster):
    if not cmu.have_cm_api_libs():
        LOG.error(
            _LE("For provisioning cluster with CDH plugin install"
                "'cm_api' package version 6.0.2 or later."))
        raise ex.HadoopProvisionError(_("'cm_api' is not installed."))

    mng_count = _get_inst_count(cluster, 'MANAGER')
    if mng_count != 1:
        raise ex.InvalidComponentCountException('MANAGER', 1, mng_count)

    nn_count = _get_inst_count(cluster, 'NAMENODE')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('NAMENODE', 1, nn_count)

    snn_count = _get_inst_count(cluster, 'SECONDARYNAMENODE')
    if snn_count != 1:
        raise ex.InvalidComponentCountException('SECONDARYNAMENODE', 1,
                                                snn_count)

    rm_count = _get_inst_count(cluster, 'RESOURCEMANAGER')
    if rm_count not in [0, 1]:
        raise ex.InvalidComponentCountException('RESOURCEMANAGER', '0 or 1',
                                                rm_count)

    hs_count = _get_inst_count(cluster, 'JOBHISTORY')
    if hs_count not in [0, 1]:
        raise ex.InvalidComponentCountException('JOBHISTORY', '0 or 1',
                                                hs_count)

    if rm_count > 0 and hs_count < 1:
        raise ex.RequiredServiceMissingException('JOBHISTORY',
                                                 required_by='RESOURCEMANAGER')

    nm_count = _get_inst_count(cluster, 'NODEMANAGER')
    if rm_count == 0:
        if nm_count > 0:
            raise ex.RequiredServiceMissingException('RESOURCEMANAGER',
                                                     required_by='NODEMANAGER')

    oo_count = _get_inst_count(cluster, 'OOZIE_SERVER')
    dn_count = _get_inst_count(cluster, 'DATANODE')
    if oo_count not in [0, 1]:
        raise ex.InvalidComponentCountException('OOZIE_SERVER', '0 or 1',
                                                oo_count)

    if oo_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'DATANODE', required_by='OOZIE_SERVER')

        if nm_count < 1:
            raise ex.RequiredServiceMissingException(
                'NODEMANAGER', required_by='OOZIE_SERVER')

        if hs_count != 1:
            raise ex.RequiredServiceMissingException(
                'JOBHISTORY', required_by='OOZIE_SERVER')
Exemple #39
0
    def wrapper(cluster_id, *args, **kwds):
        try:
            f(cluster_id, *args, **kwds)
        except Exception as ex:
            # something happened during cluster operation
            ctx = context.ctx()
            cluster = conductor.cluster_get(ctx, cluster_id)
            # check if cluster still exists (it might have been removed)
            if cluster is None or cluster.status == 'Deleting':
                LOG.info(
                    _LI("Cluster %s was deleted or marked for "
                        "deletion. Canceling current operation."), cluster_id)
                return

            LOG.exception(
                _LE("Error during operating cluster '%(name)s' (reason: "
                    "%(reason)s)"), {
                        'name': cluster.name,
                        'reason': ex
                    })

            try:
                # trying to rollback
                if _rollback_cluster(cluster, ex):
                    g.change_cluster_status(cluster, "Active")
                else:
                    g.change_cluster_status(cluster, "Error")
            except Exception as rex:
                cluster = conductor.cluster_get(ctx, cluster_id)
                # check if cluster still exists (it might have been
                # removed during rollback)
                if cluster is None:
                    LOG.info(
                        _LI("Cluster with %s was deleted. Canceling "
                            "current operation."), cluster_id)
                    return

                LOG.exception(
                    _LE("Error during rollback of cluster '%(name)s' (reason: "
                        "%(reason)s)"), {
                            'name': cluster.name,
                            'reason': rex
                        })

                g.change_cluster_status(cluster, "Error")
Exemple #40
0
def update_job_statuses():
    ctx = context.ctx()
    for je in conductor.job_execution_get_all(ctx, end_time=None):
        try:
            get_job_status(je.id)
        except Exception as e:
            LOG.exception(
                _LE("Error during update job execution %(job)s: %(error)s"),
                {'job': je.id, 'error': e})
Exemple #41
0
def validate_cluster_creating(cluster):
    if not cmu.have_cm_api_libs():
        LOG.error(_LE("For provisioning cluster with CDH plugin install"
                      "'cm_api' package version 6.0.2 or later."))
        raise ex.HadoopProvisionError(_("'cm_api' is not installed."))

    mng_count = _get_inst_count(cluster, 'MANAGER')
    if mng_count != 1:
        raise ex.InvalidComponentCountException('MANAGER', 1, mng_count)

    nn_count = _get_inst_count(cluster, 'NAMENODE')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('NAMENODE', 1, nn_count)

    snn_count = _get_inst_count(cluster, 'SECONDARYNAMENODE')
    if snn_count != 1:
        raise ex.InvalidComponentCountException('SECONDARYNAMENODE', 1,
                                                snn_count)

    rm_count = _get_inst_count(cluster, 'RESOURCEMANAGER')
    if rm_count not in [0, 1]:
        raise ex.InvalidComponentCountException('RESOURCEMANAGER', '0 or 1',
                                                rm_count)

    hs_count = _get_inst_count(cluster, 'JOBHISTORY')
    if hs_count not in [0, 1]:
        raise ex.InvalidComponentCountException('JOBHISTORY', '0 or 1',
                                                hs_count)

    if rm_count > 0 and hs_count < 1:
        raise ex.RequiredServiceMissingException('JOBHISTORY',
                                                 required_by='RESOURCEMANAGER')

    nm_count = _get_inst_count(cluster, 'NODEMANAGER')
    if rm_count == 0:
        if nm_count > 0:
            raise ex.RequiredServiceMissingException('RESOURCEMANAGER',
                                                     required_by='NODEMANAGER')

    oo_count = _get_inst_count(cluster, 'OOZIE_SERVER')
    dn_count = _get_inst_count(cluster, 'DATANODE')
    if oo_count not in [0, 1]:
        raise ex.InvalidComponentCountException('OOZIE_SERVER', '0 or 1',
                                                oo_count)

    if oo_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'DATANODE', required_by='OOZIE_SERVER')

        if nm_count < 1:
            raise ex.RequiredServiceMissingException(
                'NODEMANAGER', required_by='OOZIE_SERVER')

        if hs_count != 1:
            raise ex.RequiredServiceMissingException(
                'JOBHISTORY', required_by='OOZIE_SERVER')
Exemple #42
0
def abort_and_log(status_code, descr, exc=None):
    LOG.error(_LE("Request aborted with status code %(code)s and "
                  "message '%(message)s'"),
              {'code': status_code, 'message': descr})

    if exc is not None:
        LOG.error(traceback.format_exc())

    flask.abort(status_code, description=descr)
Exemple #43
0
def abort_and_log(status_code, descr, exc=None):
    LOG.error(_LE("Request aborted with status code %(code)s and "
                  "message '%(message)s'"),
              {'code': status_code, 'message': descr})

    if exc is not None:
        LOG.error(traceback.format_exc())

    flask.abort(status_code, description=descr)
Exemple #44
0
 def _instance_get_data(self, instance, lock):
     try:
         with instance.remote() as r:
             data = self._get_resolv_conf(r)
     except Exception:
         data = None
         LOG.exception(_LE("Couldn't read '/etc/resolv.conf'"))
     with lock:
         self._data[instance.get_ip_or_dns_name()] = data
Exemple #45
0
def abort_and_log(status_code, descr, exc=None):
    LOG.error(_LE("Request aborted with status code {code} and "
                  "message '{message}'").format(code=status_code,
                                                message=descr))

    if exc is not None:
        LOG.error(traceback.format_exc())

    flask.abort(status_code, description=descr)
 def check_health(self):
     inst_ips_or_names = self.provider.get_accessibility_data()
     if inst_ips_or_names:
         insts = ', '.join(inst_ips_or_names)
         LOG.exception(
             _LE("Instances (%s) are not available in the cluster") % insts)
         raise RedHealthError(
             _("Instances (%s) are not available in the cluster.") % insts)
     return _("All instances are available")
Exemple #47
0
 def check_health(self):
     inst_ips_or_names = self.provider.get_accessibility_data()
     if inst_ips_or_names:
         insts = ', '.join(inst_ips_or_names)
         LOG.exception(
             _LE("Instances (%s) are not available in the cluster") % insts)
         raise RedHealthError(
             _("Instances (%s) are not available in the cluster.") % insts)
     return _("All instances are available")
Exemple #48
0
        def hup(*args):
            """Shuts down the server(s).

            Shuts down the server(s), but allows running requests to complete
            """
            LOG.error(_LE('SIGHUP received'))
            signal.signal(signal.SIGHUP, signal.SIG_IGN)
            os.killpg(0, signal.SIGHUP)
            signal.signal(signal.SIGHUP, hup)
Exemple #49
0
        def hup(*args):
            """Shuts down the server(s).

            Shuts down the server(s), but allows running requests to complete
            """
            LOG.error(_LE('SIGHUP received'))
            signal.signal(signal.SIGHUP, signal.SIG_IGN)
            os.killpg(0, signal.SIGHUP)
            signal.signal(signal.SIGHUP, hup)
Exemple #50
0
def update_job_statuses():
    ctx = context.ctx()
    for je in conductor.job_execution_get_all(ctx, end_time=None):
        try:
            get_job_status(je.id)
        except Exception as e:
            LOG.error(
                _LE("Error during update job execution {job}: {error}").format(
                    job=je.id, error=e))
Exemple #51
0
def update_job_statuses():
    ctx = context.ctx()
    for je in conductor.job_execution_get_all(ctx, end_time=None):
        try:
            get_job_status(je.id)
        except Exception as e:
            LOG.exception(
                _LE("Error during update job execution %(job)s: %(error)s"),
                {'job': je.id, 'error': e})
Exemple #52
0
def abort_and_log(status_code, descr, exc=None):
    LOG.error(
        _LE("Request aborted with status code {code} and "
            "message '{message}'").format(code=status_code, message=descr))

    if exc is not None:
        LOG.error(traceback.format_exc())

    flask.abort(status_code, description=descr)
 def _instance_get_data(self, instance, lock):
     try:
         with instance.remote() as r:
             data = self._get_resolv_conf(r)
     except Exception:
         data = None
         LOG.exception(_LE("Couldn't read '/etc/resolv.conf'"))
     with lock:
         self._data[instance.get_ip_or_dns_name()] = data
Exemple #54
0
def bad_request(error):
    error_code = 400

    LOG.error(_LE("Validation Error occurred: "
                  "error_code={code}, error_message={message}, "
                  "error_name={name}").format(code=error_code,
                                              message=error.message,
                                              name=error.code))

    return render_error_message(error_code, error.message, error.code)
Exemple #55
0
def access_denied(error):
    error_code = 403

    LOG.error(_LE("Access Denied: "
                  "error_code={code}, error_message={message}, "
                  "error_name={name}").format(code=error_code,
                                              message=error.message,
                                              name=error.code))

    return render_error_message(error_code, error.message, error.code)
Exemple #56
0
def not_found(error):
    error_code = 404

    LOG.error(_LE("Not Found exception occurred: "
                  "error_code={code}, error_message={message}, "
                  "error_name={name}").format(code=error_code,
                                              message=error.message,
                                              name=error.code))

    return render_error_message(error_code, error.message, error.code)