Ejemplo n.º 1
0
    def get(self, relpath=None, params=None):
        """Invoke the GET method on a resource

        :param relpath: Optional. A relative path to this resource's path.
        :param params: Key-value data.

        :return: A dictionary of the JSON result.
        """
        for retry in six.moves.xrange(self.retries + 1):
            if retry:
                context.sleep(self.retry_sleep)
            try:
                return self.invoke("GET", relpath, params)
            except (socket.error, urllib.error.URLError) as e:
                if "timed out" in six.text_type(e).lower():
                    if retry < self.retries:
                        LOG.warning("Timeout issuing GET request for "
                                    "{path}. Will retry".format(
                                        path=self._join_uri(relpath)))
                    else:
                        LOG.warning("Timeout issuing GET request for "
                                    "{path}. No retries left".format(
                                        path=self._join_uri(relpath)))
                else:
                    raise
        else:
            raise ex.CMApiException(_("Get retry max time reached."))
Ejemplo n.º 2
0
 def wait_ambari_requests(self, requests, cluster_name):
     requests = set(requests)
     failed = []
     context.sleep(20)
     while len(requests) > 0:
         completed, not_completed = set(), set()
         for req_id in requests:
             request = self.get_request_info(cluster_name, req_id)
             status = request.get("request_status")
             if status == 'COMPLETED':
                 completed.add(req_id)
             elif status in ['IN_PROGRESS', 'PENDING']:
                 not_completed.add(req_id)
             else:
                 failed.append(request)
         if failed:
             msg = _("Some Ambari request(s) "
                     "not in COMPLETED state: %(description)s.")
             descrs = []
             for req in failed:
                 descr = _(
                     "request %(id)d: %(name)s - in status %(status)s")
                 descrs.append(descr %
                               {'id': req.get("id"),
                                'name': req.get("request_context"),
                                'status': req.get("request_status")})
             raise p_exc.HadoopProvisionError(msg % {'description': descrs})
         requests = not_completed
         context.sleep(5)
         LOG.debug("Waiting for %d ambari request(s) to be completed",
                   len(not_completed))
     LOG.debug("All ambari requests have been completed")
Ejemplo n.º 3
0
 def _await_cldb(self, cluster_context, instances=None, timeout=600):
     instances = instances or cluster_context.get_instances()
     cldb_node = cluster_context.get_instance(mfs.CLDB)
     start_time = timeutils.utcnow()
     retry_count = 0
     with cldb_node.remote() as r:
         LOG.debug("Waiting {count} seconds for CLDB initialization".format(
             count=timeout))
         while timeutils.delta_seconds(start_time,
                                       timeutils.utcnow()) < timeout:
             ec, out = r.execute_command(NODE_LIST_CMD,
                                         raise_when_error=False)
             resp = json.loads(out)
             status = resp['status']
             if str(status).lower() == 'ok':
                 ips = [n['ip'] for n in resp['data']]
                 retry_count += 1
                 for i in instances:
                     if (i.internal_ip not in ips
                             and (retry_count > DEFAULT_RETRY_COUNT)):
                         msg = _("Node failed to connect to CLDB: %s"
                                 ) % i.internal_ip
                         raise ex.HadoopProvisionError(msg)
                 break
             else:
                 context.sleep(DELAY)
         else:
             raise ex.HadoopProvisionError(_("CLDB failed to start"))
Ejemplo n.º 4
0
    def wait(self, timeout=None):
        """Wait for command to finish

        :param timeout: (Optional) Max amount of time (in seconds) to wait.
                        Wait forever by default.
        :return: The final ApiCommand object, containing the last known state.
                 The command may still be running in case of timeout.
        """
        if self.id == ApiCommand.SYNCHRONOUS_COMMAND_ID:
            return self

        SLEEP_SEC = 5

        if timeout is None:
            deadline = None
        else:
            deadline = time.time() + timeout

        while True:
            cmd = self.fetch()
            if not cmd.active:
                return cmd

            if deadline is not None:
                now = time.time()
                if deadline < now:
                    return cmd
                else:
                    context.sleep(min(SLEEP_SEC, deadline - now))
            else:
                context.sleep(SLEEP_SEC)
Ejemplo n.º 5
0
def _wait_all_processes_removed(cluster, instance):
    with _get_ambari_client(cluster) as client:
        while True:
            hdp_processes = client.list_host_processes(cluster.name, instance)
            if not hdp_processes:
                return
            context.sleep(5)
Ejemplo n.º 6
0
 def update_configs(self, instances):
     # instances non-empty
     utils.add_provisioning_step(
         instances[0].cluster_id, _("Update configs"), len(instances))
     with context.PluginsThreadGroup() as tg:
         for instance in instances:
             tg.spawn("update-configs-%s" % instance.instance_name,
                      self._update_configs, instance)
             context.sleep(1)
Ejemplo n.º 7
0
 def wait_ambari_request(self, request_id, cluster_name):
     context.sleep(20)
     while True:
         status = self.check_request_status(cluster_name, request_id)
         LOG.debug("Task %(context)s in %(status)s state. "
                   "Completed %(percent).1f%%",
                   {'context': status["request_context"],
                    'status': status["request_status"],
                    'percent': status["progress_percent"]})
         if status["request_status"] == "COMPLETED":
             return
         if status["request_status"] in ["IN_PROGRESS", "PENDING"]:
             context.sleep(5)
         else:
             raise p_exc.HadoopProvisionError(
                 _("Ambari request in %s state") % status["request_status"])
Ejemplo n.º 8
0
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with utils.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(inst_to_be_deleted))
        run.refresh_nodes(utils.get_remote(nn), "dfsadmin")
        context.sleep(3)

        utils.plugin_option_poll(nn.cluster, _is_decommissioned,
                                 c_helper.DECOMMISSIONING_TIMEOUT,
                                 _("Decommission %s") % "DataNodes", 3, {
                                     'r': r,
                                     'inst_to_be_deleted': inst_to_be_deleted
                                 })

        r.write_files_to({
            '/etc/hadoop/dn.incl':
            utils.generate_fqdn_host_names(survived_inst),
            '/etc/hadoop/dn.excl':
            ""
        })
Ejemplo n.º 9
0
 def await_no_heartbeat():
     delay = WAIT_NODE_ALARM_NO_HEARTBEAT
     LOG.debug('Waiting for "NO_HEARBEAT" alarm')
     context.sleep(delay)
Ejemplo n.º 10
0
 def _rebuild(self, cluster_context, instances):
     OOZIE.stop(filter(OOZIE.is_started, instances))
     g.execute_on_instances(
         instances, self._rebuild_oozie_war, cluster_context)
     OOZIE.start(instances)
     context.sleep(OOZIE_START_DELAY)