def _test_yum_rpm_sanity(self, agent_ssh, auth_args, address): from chroma_core.services.job_scheduler.agent_rpc import AgentException from chroma_core.models import ServerProfile can_update = False try: # Check to see if yum can or ever has gotten OS repo metadata or if base packages are # already installed and no repos are enabled check_yum = """ python -c "from yum import YumBase yb = YumBase() baselist = %s if len([x for x in yb.doPackageLists(pkgnarrow='installed', patterns=baselist)]) >= len(baselist): exit(0) missing_electric_fence = not [p.name for p in yb.pkgSack.returnNewestByNameArch() if p.name == 'ElectricFence'] exit(missing_electric_fence)" """ % [x for x in ServerProfile().base_packages] rc, _, _ = try_ssh_cmd(agent_ssh, auth_args, check_yum) if rc == 0: can_update = True else: job_log.error( "Failed configuration check on '%s': Unable to access any yum mirrors" % address) except AgentException: job_log.exception( "Exception thrown while trying to invoke agent on '%s':" % address) return False, False return can_update
def _test_reverse_ping(self, agent_ssh, auth_args, address, manager_hostname): from chroma_core.services.job_scheduler.agent_rpc import AgentException try: # Test resolution/ping from server back to manager rc, out, err = try_ssh_cmd(agent_ssh, auth_args, "ping -c 1 %s" % manager_hostname) except AgentException: job_log.exception( "Exception thrown while trying to invoke agent on '%s':" % address) return False, False if rc == 0: # Can resolve, can ping return True, True elif rc == 1: # Can resolve, cannot ping job_log.error("Failed configuration check on '%s': Can't ping %s" % (address, manager_hostname)) return True, False else: # Cannot resolve, cannot ping job_log.error( "Failed configuration check on '%s': Can't resolve %s" % (address, manager_hostname)) return False, False
def _test_yum_sanity(self, agent_ssh, auth_args, address): from chroma_core.services.job_scheduler.agent_rpc import AgentException can_update = False try: # Check to see if yum can or ever has gotten OS repo metadata check_yum = """ python -c "from yum import YumBase yb = YumBase() missing_electric_fence = not [p.name for p in yb.pkgSack.returnNewestByNameArch() if p.name == 'ElectricFence'] exit(missing_electric_fence)" """ rc, out, err = self._try_ssh_cmd(agent_ssh, auth_args, check_yum) if rc == 0: can_update = True else: job_log.error( "Failed configuration check on '%s': Unable to access any yum mirrors" % address) except AgentException: job_log.exception( "Exception thrown while trying to invoke agent on '%s':" % address) return False, False return can_update
def _test_hostname(self, agent_ssh, auth_args, address, resolved_address): from chroma_core.services.job_scheduler.agent_rpc import AgentException try: # Check that the system hostname: # a) resolves # b) does not resolve to a loopback address rc, out, err = self._try_ssh_cmd( agent_ssh, auth_args, "python -c 'import socket; print socket.gethostbyname(socket.gethostname())'" ) hostname_resolution = out.rstrip() except AgentException: job_log.exception( "Exception thrown while trying to invoke agent on '%s':" % address) else: if rc != 0: job_log.error( "Failed configuration check on '%s': hostname does not resolve (%s)" % (address, err)) return False, False, False if hostname_resolution.startswith('127'): job_log.error( "Failed configuration check on '%s': hostname resolves to a loopback address (%s)" % (address, hostname_resolution)) return False, False, False try: rc, out, err = self._try_ssh_cmd( agent_ssh, auth_args, "python -c 'import socket; print socket.getfqdn()'") assert rc == 0, "failed to get fqdn on %s: %s" % (address, err) fqdn = out.rstrip() except (AssertionError, AgentException): job_log.exception( "Exception thrown while trying to invoke agent on '%s':" % address) return False, False, False try: resolved_fqdn = socket.gethostbyname(fqdn) except socket.gaierror: job_log.error( "Failed configuration check on '%s': can't resolve self-reported fqdn '%s'" % (address, fqdn)) return True, False, False if resolved_fqdn != resolved_address: job_log.error( "Failed configuration check on '%s': self-reported fqdn resolution '%s' doesn't match address resolution" % (address, fqdn)) return True, True, False # Everything's OK (we hope!) return True, True, True
def set_state(cls, objects, message=None, **kwargs): """The states argument must be a collection of 2-tuples of (<StatefulObject instance>, state)""" # If you ever work on this function please talk to Chris. It should not be in this class. It has nothing to # do with the Command class other than it makes use of a Command and should be moved to the Stateful object # class because think about it can only operate on stateful objects. from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerClient for object, state in objects: # Check if the state is modified if object.state != state: if not message: old_state = object.state new_state = state route = object.get_route(old_state, new_state) from chroma_core.services.job_scheduler.command_plan import Transition job = Transition(object, route[-2], route[-1]).to_job() message = job.description() object_ids = [ (ContentType.objects.get_for_model(object).natural_key(), object.id, state) for object, state in objects ] try: command_id = JobSchedulerClient.command_set_state( object_ids, message, **kwargs) except RpcError as e: job_log.error("Failed to set object state: " + traceback.format_exc()) # FIXME: Would be better to have a generalized mechanism # for reconstituting remote exceptions, as this sort of thing # won't scale. if e.remote_exception_type == "SchedulingError": raise SchedulingError(e.description) else: raise return Command.objects.get(pk=command_id) return None