コード例 #1
0
    def __init__(self, local_targets):
        super(MgsTargets, self).__init__()
        self.filesystems = {}

        mgs_target = None

        for t in local_targets:
            if t["name"] == "MGS" and t['mounted']:
                mgs_target = t

        if mgs_target:
            daemon_log.info("Searching Lustre logs for filesystems")

            block_device = BlockDevice(mgs_target["type"],
                                       mgs_target["device_paths"][0])

            self.filesystems = block_device.mgs_targets(daemon_log)
コード例 #2
0
    def run(self):
        # Grab a reference to the thread-local state for this thread and put
        # it somewhere that other threads can see it, so that we can be signalled
        # to shut down
        self._subprocess_abort = AgentShell.thread_state.abort

        # We are now stoppable
        self._started.set()

        daemon_log.info("%s.run: %s %s %s" % (self.__class__.__name__, self.id, self.action, self.args))
        try:
            AgentShell.thread_state.enable_save()

            agent_daemon_context = AgentDaemonContext(self.manager._session._client.sessions._sessions)

            result = self.manager._session._client.action_plugins.run(self.action, agent_daemon_context, self.args)
        except CallbackAfterResponse, e:
            self.manager.respond_with_callback(self.id, e, AgentShell.thread_state.get_subprocesses())
コード例 #3
0
def detect_scan(target_devices=None):
    """Look for Lustre on possible devices

    Save the input devices when possible.  Then future calls will
    not need to specify the target_devices
    """

    right_now = str(datetime.now())

    if target_devices is not None:
        target_devices_time_stamped = dict(timestamp=right_now,
                                           target_devices=target_devices)
        config.update('settings', 'last_detect_scan_target_devices',
                      target_devices_time_stamped)

    try:
        # Recall the last target_devices used in this method
        settings = config.get('settings', 'last_detect_scan_target_devices')
    except KeyError:
        # This method was never called with a non-null target_devices
        # or the setting file holding the device record is not found.
        daemon_log.warn("detect_scan improperly called without target_devices "
                        "and without a previous call's target_devices to use.")

        # TODO: Consider an exception here. But, since this is a rare case, it seems reasonable to return emptiness
        # TODO: If this raised an exception, it should be a handled one in any client, and that seems too heavy
        local_targets = LocalTargets([])
        timestamp = right_now

    else:
        # Have target devices, so process them
        timestamp = settings['timestamp']
        daemon_log.info(
            "detect_scan called at %s with target_devices saved on %s" %
            (str(datetime.now()), timestamp))
        local_targets = LocalTargets(settings['target_devices'])

    # Return the discovered Lustre components on the target devices, may return emptiness.
    mgs_targets = MgsTargets(local_targets.targets)
    return {
        "target_devices_saved_timestamp": timestamp,
        "local_targets": local_targets.targets,
        "mgs_targets": mgs_targets.filesystems
    }
コード例 #4
0
    def targets(self, uuid_name_to_target, device, log):
        try:
            self._check_module()
        except Shell.CommandExecutionError:
            log.info("zfs is not installed, skipping device %s" %
                     device['path'])
            return self.TargetsInfo([], None)

        if log:
            log.info(
                "Searching device %s of type %s, uuid %s for a Lustre filesystem"
                % (device['path'], device['type'], device['uuid']))

        zfs_properties = self.zfs_properties(False, log)

        if ('lustre:svname' not in zfs_properties) or ('lustre:flags'
                                                       not in zfs_properties):
            if log:
                log.info(
                    "Device %s did not have a Lustre property values required"
                    % device['path'])
            return self.TargetsInfo([], None)

        # For a Lustre block device, extract name and params
        # ==================================================
        name = zfs_properties['lustre:svname']
        flags = int(zfs_properties['lustre:flags'])

        params = defaultdict(list)

        for zfs_property in zfs_properties:
            if zfs_property.startswith('lustre:'):
                lustre_property = zfs_property.split(':')[1]
                params[lustre_property].extend(
                    re.split(
                        BlockDeviceZfs.
                        lustre_property_delimiters[lustre_property],
                        zfs_properties[zfs_property]))

        if name.find("ffff") != -1:
            if log:
                log.info("Device %s reported an unregistered lustre target" %
                         device['path'])
            return self.TargetsInfo([], None)

        if (flags & self.LDD_F_SV_TYPE_MGS_or_MDT
            ) == self.LDD_F_SV_TYPE_MGS_or_MDT:
            # For combined MGS/MDT volumes, synthesise an 'MGS'
            names = ["MGS", name]
        else:
            names = [name]

        return self.TargetsInfo(names, params)
コード例 #5
0
 def mock_get_param_raw(self, path):
     param = path.replace("/", ".")
     daemon_log.info("mock_get_params_lines: " + param)
     data = ""
     for fn in glob(param):
         with open(fn, "r") as content_file:
             data += content_file.read()
     if data:
         return data
     else:
         raise AgentShell.CommandExecutionError(
             AgentShell.RunResult(
                 2,
                 "",
                 "error: get_param: param_path '" + param +
                 "': No such file or directory",
                 0,
             ),
             ["lctl", "get_param", "-n", path],
         )
コード例 #6
0
    def _handle_messages(self, messages):
        daemon_log.info("HttpReader: got %s messages" % (len(messages)))
        for message in messages:
            m = Message()
            m.parse(message)
            daemon_log.info("HttpReader: %s(%s, %s)" %
                            (m.type, m.plugin_name, m.session_id))

            try:
                if m.type == "SESSION_CREATE_RESPONSE":
                    self._client.sessions.create(m.plugin_name, m.session_id)
                elif m.type == "SESSION_TERMINATE_ALL":
                    self._client.sessions.terminate_all()
                elif m.type == "SESSION_TERMINATE":
                    self._client.sessions.terminate(m.plugin_name)
                elif m.type == "DATA":
                    try:
                        session = self._client.sessions.get(
                            m.plugin_name, m.session_id)
                    except KeyError:
                        daemon_log.warning(
                            "Received a message for unknown session %s/%s" %
                            (m.plugin_name, m.session_id))
                    else:
                        # We have successfully routed the message to the plugin instance
                        # for this session
                        try:
                            session.receive_message(m.body)
                        except:
                            daemon_log.error("%s/%s raised an exception: %s" %
                                             (m.plugin_name, m.session_id,
                                              traceback.format_exc()))
                            self._client.sessions.terminate(m.plugin_name)
                else:
                    raise NotImplementedError(m.type)
            except Exception:
                backtrace = "\n".join(
                    traceback.format_exception(*(sys.exc_info())))
                daemon_log.error("Plugin exception handling data message: %s" %
                                 backtrace)
コード例 #7
0
    def _run(self):
        get_args = {
            "server_boot_time": self._client.boot_time.isoformat() + "Z",
            "client_start_time": self._client.start_time.isoformat() + "Z",
        }
        while not self._stopping.is_set():
            daemon_log.info("HttpReader: get")
            try:
                body = self._client.get(params=get_args)
            except HttpError:
                daemon_log.warning("HttpReader: request failed")
                # We potentially dropped TX messages if this happened, which could include
                # session control messages, so have to completely reset.
                # NB could change this to only terminate_all if an HTTP request was started: there is
                # no need to do the teardown if we didn't even get a TCP connection to the manager.
                self._client.sessions.terminate_all()

                self._stopping.wait(timeout=self.HTTP_RETRY_PERIOD)
                continue
            else:
                self._handle_messages(body["messages"])
        daemon_log.info("HttpReader: stopping")
コード例 #8
0
 def mock_get_param_lines(self, path, filter_f=None):
     param = path.replace("/", ".")
     daemon_log.info("mock_get_params_lines: " + param)
     flist = glob(param)
     if not flist:
         raise AgentShell.CommandExecutionError(
             AgentShell.RunResult(
                 2,
                 "",
                 "error: get_param: param_path '" + param +
                 "': No such file or directory",
                 0,
             ),
             ["lctl", "get_param", "-n", path],
         )
     for fn in flist:
         with open(fn, "r") as content_file:
             for line in content_file:
                 if filter_f:
                     if filter_f(line):
                         yield line.strip()
                 else:
                     yield line.strip()
コード例 #9
0
    def send(self):
        """Return True if the POST succeeds, else False"""
        messages = []
        completion_callbacks = []

        post_envelope = {
            "messages": [],
            "server_boot_time": self._client.boot_time.isoformat() + "Z",
            "client_start_time": self._client.start_time.isoformat() + "Z",
        }

        # Any message we drop will need its session killed
        kill_sessions = set()

        messages_bytes = len(json.dumps(post_envelope))
        while True:
            try:
                message = self._retry_messages.get_nowait()
                daemon_log.debug("HttpWriter got message from retry queue")
            except Queue.Empty:
                try:
                    message = self._messages.get_nowait()
                    daemon_log.debug(
                        "HttpWriter got message from primary queue")
                except Queue.Empty:
                    break

            if message.callback:
                completion_callbacks.append(message.callback)
            message_length = len(json.dumps(message.dump(self._client._fqdn)))

            if message_length > MAX_BYTES_PER_POST:
                daemon_log.warning("Oversized message %s/%s: %s" % (
                    message_length,
                    MAX_BYTES_PER_POST,
                    message.dump(self._client._fqdn),
                ))

            if messages and message_length > MAX_BYTES_PER_POST - messages_bytes:
                # This message will not fit into this POST: pop it back into the queue
                daemon_log.info(
                    "HttpWriter message %s overflowed POST %s/%s (%d "
                    "messages), enqueuing" % (
                        message.dump(self._client._fqdn),
                        message_length,
                        MAX_BYTES_PER_POST,
                        len(messages),
                    ))
                self._retry_messages.put(message)
                break

            messages.append(message)
            messages_bytes += message_length

        daemon_log.debug("HttpWriter sending %s messages" % len(messages))
        try:
            post_envelope["messages"] = [
                m.dump(self._client._fqdn) for m in messages
            ]
            self._client.post(post_envelope)
        except HttpError:
            daemon_log.warning("HttpWriter: request failed")
            # Terminate any sessions which we've just droppped messages for
            for message in messages:
                if message.type == "DATA":
                    kill_sessions.add(message.plugin_name)
            for plugin_name in kill_sessions:
                self._client.sessions.terminate(plugin_name)

            return False
        else:
            return True
        finally:
            for callback in completion_callbacks:
                callback()
コード例 #10
0
 def create(self, plugin_name, id):
     daemon_log.info("SessionTable.create %s/%s" % (plugin_name, id))
     self._requested_at.pop(plugin_name, None)
     self._backoffs.pop(plugin_name, None)
     self._sessions[plugin_name] = Session(self._client, id, plugin_name)
コード例 #11
0
 def receive_message(self, body):
     daemon_log.info("Session.receive_message %s/%s" %
                     (self._plugin_name, self.id))
     self._plugin.on_message(body)
コード例 #12
0
 def _shutdown():
     daemon_log.info("Restarting iml-storage-server.target")
     # Use subprocess.Popen instead of try_run because we don't want to
     # wait for completion.
     subprocess.Popen(["systemctl", "restart", "iml-storage-server.target"])
コード例 #13
0
 def _shutdown():
     daemon_log.info("Restarting agent")
     # Use subprocess.Popen instead of try_run because we don't want to
     # wait for completion.
     subprocess.Popen(['service', 'chroma-agent', 'restart'])
コード例 #14
0
ファイル: agent_daemon.py プロジェクト: zaja1kun/iml-agent
                                   DevicePluginManager(), ServerProperties(),
                                   Crypto(ENV_PATH))

        def teardown_callback(*args, **kwargs):
            agent_client.stop()
            agent_client.join()
            [function() for function in agent_daemon_teardown_functions]

        signal.signal(signal.SIGINT, teardown_callback)
        signal.signal(signal.SIGTERM, teardown_callback)
        signal.signal(signal.SIGUSR1, decrease_loglevel)
        signal.signal(signal.SIGUSR2, increase_loglevel)

        # Call any agent daemon startup methods that were registered.
        [function() for function in agent_daemon_startup_functions]

        agent_client.start()
        # Waking-wait to pick up signals
        while not agent_client.stopped.is_set():
            agent_client.stopped.wait(timeout=10)

        agent_client.join()
    except Exception, e:
        backtrace = '\n'.join(traceback.format_exception(*(sys.exc_info())))
        daemon_log.error("Unhandled exception: %s" % backtrace)

    # Call any agent daemon teardown methods that were registered.
    [function() for function in agent_daemon_teardown_functions]

    daemon_log.info("Terminating")
コード例 #15
0
def yum_util(action, packages=[], fromrepo=None, enablerepo=None, narrow_updates=False):
    '''
    A wrapper to perform yum actions in encapsulated way.
    :param action:  clean, install, remove, update, requires etc
    :param packages: Packages to install or remove
    :param fromrepo: The repo the action should be carried out from, others are disabled.
    :param enablerepo: The repo to enable for the action, others are not disabled or enabled
    :param narrow_updates: ?
    :return: No return but throws CommandExecutionError on error.
    '''

    if fromrepo and enablerepo:
        raise ValueError("Cannot provide fromrepo and enablerepo simultaneously")

    repo_arg = []
    valid_rc_values = [0]                               # Some errors values other than 0 are valid.
    tries = 2
    if fromrepo:
        repo_arg = ['--disablerepo=*'] + ['--enablerepo=%s' % r for r in fromrepo]
    elif enablerepo:
        repo_arg = ['--enablerepo=%s' % r for r in enablerepo]
    if narrow_updates and action == 'query':
        repo_arg.extend(['--upgrades'])

    if action == 'clean':
        cmd = ['dnf', 'clean', 'all'] + (repo_arg if repo_arg else ["--enablerepo=*"])
    elif action == 'install':
        cmd = ['dnf', 'install', '--allowerasing', '-y', '--exclude', 'kernel-debug'] + \
               repo_arg + list(packages)
    elif action == 'remove':
        cmd = ['dnf', 'remove', '-y'] + repo_arg + list(packages)
    elif action == 'update':
        cmd = ['dnf', 'update', '--allowerasing', '-y', '--exclude', 'kernel-debug'] + \
               repo_arg + list(packages)
    elif action == 'requires':
        cmd = ['dnf', 'repoquery', '--requires'] + repo_arg + list(packages)
    elif action == 'query':
        cmd = ['dnf', 'repoquery', '--available'] + repo_arg + list(packages)
    elif action == 'repoquery':
        cmd = ['dnf', 'repoquery', '--available'] + repo_arg + ['--queryformat=%{EPOCH} %{NAME} %{VERSION} %{RELEASE} %{ARCH}']
    elif action == 'check-update':
        cmd = ['dnf', 'repoquery', '--queryformat=%{name} %{version}-%{release}.'
               '%{arch} %{repoid}', '--upgrades'] + repo_arg + \
            list(packages)
    else:
        raise RuntimeError('Unknown yum util action %s' % action)

    # This is a poor solution for HYD-3855 but not one that carries any known cost.
    # We sometimes see intermittent failures in test, and possibly out of test, that occur
    # 1 in 50 (estimate) times. yum commands are idempotent and so trying the command three
    # times has no downside and changes the estimated chance of fail to 1 in 12500.
    for hyd_3885 in range(tries, -1, -1):
        result = AgentShell.run(cmd)

        if result.rc in valid_rc_values:
            return result.stdout
        else:
            # if we were trying to install, clean the metadata before
            # trying again
            if action == 'install':
                AgentShell.run(['dnf', 'clean', 'metadata'])
            daemon_log.info("HYD-3885 Retrying yum command '%s'" % " ".join(cmd))
            if hyd_3885 == 0:
                daemon_log.info("HYD-3885 Retry yum command failed '%s'" % " ".join(cmd))
                raise AgentShell.CommandExecutionError(result, cmd)   # Out of retries so raise for the caller..
コード例 #16
0
 def respond_with_callback(self, id, callback_after_response, subprocesses):
     daemon_log.info("ActionRunner.respond_with_callback %s: %s" % (id, callback_after_response.result))
     self._notify(id, callback_after_response.result, None, subprocesses, callback_after_response.callback)
     with self._running_actions_lock:
         del self._running_actions[id]
コード例 #17
0
def main():
    parser = argparse.ArgumentParser(description="Simulated benchmarks")
    parser.add_argument('--remote_simulator',
                        required=False,
                        help="Disable built-in simulator (run it separately)",
                        default=False)
    parser.add_argument('--debug',
                        required=False,
                        help="Enable DEBUG-level logs",
                        default=False)
    parser.add_argument('--url',
                        required=False,
                        help="Manager URL",
                        default="https://localhost:8000")
    parser.add_argument('--username',
                        required=False,
                        help="REST API username",
                        default='admin')
    parser.add_argument('--password',
                        required=False,
                        help="REST API password",
                        default='lustre')
    parser.add_argument('--servers', help="server count", default=8, type=int)
    subparsers = parser.add_subparsers()

    log_ingest_parser = subparsers.add_parser("reset")
    log_ingest_parser.set_defaults(
        func=lambda args, simulator: Benchmark(args, simulator).reset())

    log_ingest_parser = subparsers.add_parser("log_ingest_rate")
    log_ingest_parser.set_defaults(func=lambda args, simulator: LogIngestRate(
        args, simulator).run_benchmark())

    server_count_limit_parser = subparsers.add_parser("server_count_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: ServerCountLimit(args, simulator
                                                      ).run_benchmark())

    server_count_limit_parser = subparsers.add_parser(
        "concurrent_registration_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: ConcurrentRegistrationLimit(
            args, simulator).run_benchmark())

    server_count_limit_parser = subparsers.add_parser("filesystem_size_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: FilesystemSizeLimit(args, simulator
                                                         ).run_benchmark())

    args = parser.parse_args()

    if args.debug:
        log.setLevel(logging.DEBUG)

    if not args.remote_simulator:
        log.info("Starting simulator...")

        # Enable logging by agent code run within simulator
        from chroma_agent.log import daemon_log
        daemon_log.setLevel(logging.DEBUG)
        handler = logging.FileHandler("chroma-agent.log")
        handler.setFormatter(
            logging.Formatter('[%(asctime)s] %(message)s',
                              '%d/%b/%Y:%H:%M:%S'))
        daemon_log.addHandler(handler)
        daemon_log.info("Enabled agent logging within simulator")

        from cluster_sim.simulator import ClusterSimulator
        simulator = ClusterSimulator(folder=None, url=args.url + "/")
        simulator.power.setup(1)
        simulator.start_all()
        simulator.setup(0,
                        0,
                        0,
                        nid_count=1,
                        cluster_size=4,
                        pdu_count=1,
                        su_size=0)
    else:
        simulator = xmlrpclib.ServerProxy("http://localhost:%s" %
                                          SIMULATOR_PORT,
                                          allow_none=True)

    try:
        log.info("Starting benchmark...")
        args.func(args, simulator)
    except:
        # Because we do a hard exit at the end here, explicitly log terminating
        # exceptions or they would get lost.
        log.error(traceback.format_exc())
        raise
    finally:
        # Do a hard exit to avoid dealing with lingering threads (not the cleanest, but
        # this isn't production code).
        os._exit(-1)

    log.info("Complete.")
コード例 #18
0
 def succeed(self, id, result, subprocesses):
     daemon_log.info("ActionRunner.succeed %s: %s" % (id, result))
     self._notify(id, result, None, subprocesses)
     with self._running_actions_lock:
         del self._running_actions[id]
コード例 #19
0
ファイル: yum_utils.py プロジェクト: whamcloud/iml-agent
def yum_util(action,
             packages=[],
             fromrepo=None,
             enablerepo=None,
             narrow_updates=False):
    """
    A wrapper to perform yum actions in encapsulated way.
    :param action:  clean, install, remove, update, requires etc
    :param packages: Packages to install or remove
    :param fromrepo: The repo the action should be carried out from, others are disabled.
    :param enablerepo: The repo to enable for the action, others are not disabled or enabled
    :param narrow_updates: ?
    :return: No return but throws CommandExecutionError on error.
    """

    if fromrepo and enablerepo:
        raise ValueError(
            "Cannot provide fromrepo and enablerepo simultaneously")

    repo_arg = []
    valid_rc_values = [0]  # Some errors values other than 0 are valid.
    tries = 2
    if fromrepo:
        repo_arg = ["--disablerepo=*"
                    ] + ["--enablerepo=%s" % r for r in fromrepo]
    elif enablerepo:
        repo_arg = ["--enablerepo=%s" % r for r in enablerepo]
    if narrow_updates and action == "query":
        repo_arg.extend(["--upgrades"])

    if action == "clean":
        cmd = ["yum", "clean", "all"
               ] + (repo_arg if repo_arg else ["--enablerepo=*"])
    elif action == "install":
        cmd = (["yum", "install", "-y", "--exclude", "kernel-debug"] +
               repo_arg + list(packages))
    elif action == "remove":
        cmd = ["yum", "remove", "-y"] + repo_arg + list(packages)
    elif action == "update":
        cmd = (["yum", "update", "-y", "--exclude", "kernel-debug"] +
               repo_arg + list(packages))
    elif action == "requires":
        cmd = ["repoquery", "--requires"] + repo_arg + list(packages)
    elif action == "query":
        cmd = ["repoquery"] + repo_arg + list(packages)
    elif action == "repoquery":
        cmd = (["repoquery", "--show-duplicates"] + repo_arg + [
            "--queryformat=%{EPOCH} %{NAME} "
            "%{VERSION} %{RELEASE} %{ARCH}"
        ])
    else:
        raise RuntimeError("Unknown yum util action %s" % action)

    # This is a poor solution for HYD-3855 but not one that carries any known cost.
    # We sometimes see intermittent failures in test, and possibly out of test, that occur
    # 1 in 50 (estimate) times. yum commands are idempotent and so trying the command three
    # times has no downside and changes the estimated chance of fail to 1 in 12500.
    for hyd_3885 in range(tries, -1, -1):
        result = AgentShell.run(cmd)

        if result.rc in valid_rc_values:
            return result.stdout
        else:
            # if we were trying to install, clean the metadata before
            # trying again
            if action == "install":
                AgentShell.run(["yum", "clean", "metadata"])
            daemon_log.info("HYD-3885 Retrying yum command '%s'" %
                            " ".join(cmd))
            if hyd_3885 == 0:
                daemon_log.info("HYD-3885 Retry yum command failed '%s'" %
                                " ".join(cmd))
                raise AgentShell.CommandExecutionError(
                    result, cmd)  # Out of retries so raise for the caller..
コード例 #20
0
 def fail(self, id, backtrace, subprocesses):
     daemon_log.info("ActionRunner.fail %s: %s" % (id, backtrace))
     self._notify(id, None, backtrace, subprocesses)
     with self._running_actions_lock:
         del self._running_actions[id]
コード例 #21
0
 def fake_post(envelope):
     if len(json.dumps(envelope)) > MAX_BYTES_PER_POST:
         daemon_log.info("fake_post(): rejecting oversized message")
         raise HttpError()
コード例 #22
0
def yum_util(action,
             packages=[],
             fromrepo=None,
             enablerepo=None,
             narrow_updates=False):
    '''
    A wrapper to perform yum actions in encapsulated way.
    :param action:  clean, install, remove, update, requires etc
    :param packages: Packages to install or remove
    :param fromrepo: The repo the action should be carried out from, others are disabled.
    :param enablerepo: The repo to enable for the action, others are not disabled or enabled
    :param narrow_updates: ?
    :return: No return but throws CommandExecutionError on error.
    '''

    if fromrepo and enablerepo:
        raise ValueError(
            "Cannot provide fromrepo and enablerepo simultaneously")

    repo_arg = []
    valid_rc_values = [0]  # Some errors values other than 0 are valid.
    if fromrepo:
        repo_arg = ['--disablerepo=*', '--enablerepo=%s' % ','.join(fromrepo)]
    elif enablerepo:
        repo_arg = ['--enablerepo=%s' % ','.join(enablerepo)]
    if narrow_updates and action == 'query':
        repo_arg.extend(['--pkgnarrow=updates', '-a'])

    if action == 'clean':
        cmd = ['yum', 'clean', 'all'
               ] + (repo_arg if repo_arg else ["--enablerepo=*"])
    elif action == 'install':
        cmd = ['yum', 'install', '-y'] + repo_arg + list(packages)
    elif action == 'remove':
        cmd = ['yum', 'remove', '-y'] + repo_arg + list(packages)
    elif action == 'update':
        cmd = ['yum', 'update', '-y'] + repo_arg + list(packages)
    elif action == 'requires':
        cmd = ['repoquery', '--requires'] + repo_arg + list(packages)
    elif action == 'query':
        cmd = ['repoquery'] + repo_arg + list(packages)
    elif action == 'repoquery':
        cmd = ['repoquery'] + repo_arg + [
            '-a', '--qf=%{EPOCH} %{NAME} %{VERSION} %{RELEASE} %{ARCH}'
        ]
    elif action == 'check-update':
        cmd = ['yum', 'check-update', '-q'] + repo_arg + list(packages)
        valid_rc_values = [
            0, 100
        ]  # check-update returns 100 if updates are available.
    else:
        raise RuntimeError('Unknown yum util action %s' % action)

    # This is a poor solution for HYD-3855 but not one that carries any known cost.
    # We sometimes see intermittent failures in test, and possibly out of test, that occur
    # 1 in 50 (estimate) times. yum commands are idempotent and so trying the command three
    # times has no downside and changes the estimated chance of fail to 1 in 12500.
    for hyd_3885 in range(2, -1, -1):
        rc, stdout, stderr = AgentShell.run_old(cmd)

        if rc in valid_rc_values:
            return stdout
        else:
            daemon_log.info("HYD-3885 Retrying yum command '%s'" %
                            " ".join(cmd))
            if hyd_3885 == 0:
                daemon_log.info("HYD-3885 Retry yum command failed '%s'" %
                                " ".join(cmd))
                raise AgentShell.CommandExecutionError(
                    AgentShell.RunResult(rc, stdout, stderr, False),
                    cmd)  # Out of retries so raise for the caller..
コード例 #23
0
        signal.signal(signal.SIGHUP, signal.SIG_IGN)
        context = DaemonContext(pidfile=PIDLockFile(args.pid_file))
        context.open()

        daemon_log_setup()
        console_log_setup()
        daemon_log.info("Starting in the background")
    else:
        context = None
        daemon_log_setup()
        daemon_log.addHandler(logging.StreamHandler())

        console_log_setup()

    try:
        daemon_log.info("Entering main loop")
        try:
            conf = config.get('settings', 'server')
        except (KeyError, TypeError) as e:
            daemon_log.error(
                "No configuration found (must be registered before running the agent service), "
                "details: %s" % e)
            return

        if config.profile_managed is False:
            # This is kind of terrible. The design of DevicePluginManager is
            # such that it can be called with either class methods or
            # instantiated and then called with instance methods. As such,
            # we can't pass in a list of excluded plugins to the instance
            # constructor. Well, we could, but it would only work some
            # of the time and that would be even more awful.
コード例 #24
0
ファイル: agent_daemon.py プロジェクト: whamcloud/iml-agent
def main():
    """handle unexpected exceptions"""
    parser = argparse.ArgumentParser(
        description="Integrated Manager for Lustre software Agent")

    parser.add_argument("--publish-zconf", action="store_true")
    parser.parse_args()

    signal.signal(signal.SIGHUP, signal.SIG_IGN)

    daemon_log_setup()
    console_log_setup()
    daemon_log.info("Starting")

    try:
        daemon_log.info("Entering main loop")
        try:
            url = urljoin(os.environ["IML_MANAGER_URL"], "agent/message/")
        except KeyError as e:
            daemon_log.error(
                "No configuration found (must be registered before running the agent service), "
                "details: %s" % e)
            return

        if config.profile_managed is False:
            # This is kind of terrible. The design of DevicePluginManager is
            # such that it can be called with either class methods or
            # instantiated and then called with instance methods. As such,
            # we can't pass in a list of excluded plugins to the instance
            # constructor. Well, we could, but it would only work some
            # of the time and that would be even more awful.
            import chroma_agent.plugin_manager

            chroma_agent.plugin_manager.EXCLUDED_PLUGINS += ["corosync"]

        agent_client = AgentClient(
            url,
            ActionPluginManager(),
            DevicePluginManager(),
            ServerProperties(),
            Crypto(ENV_PATH),
        )

        def teardown_callback(*args, **kwargs):
            agent_client.stop()
            agent_client.join()
            [function() for function in agent_daemon_teardown_functions]

        signal.signal(signal.SIGINT, teardown_callback)
        signal.signal(signal.SIGTERM, teardown_callback)
        signal.signal(signal.SIGUSR1, decrease_loglevel)
        signal.signal(signal.SIGUSR2, increase_loglevel)

        # Call any agent daemon startup methods that were registered.
        [function() for function in agent_daemon_startup_functions]

        agent_client.start()
        # Waking-wait to pick up signals
        while not agent_client.stopped.is_set():
            agent_client.stopped.wait(timeout=10)

        agent_client.join()
    except Exception as e:
        backtrace = "\n".join(traceback.format_exception(*(sys.exc_info())))
        daemon_log.error("Unhandled exception: %s" % backtrace)

    # Call any agent daemon teardown methods that were registered.
    [function() for function in agent_daemon_teardown_functions]

    daemon_log.info("Terminating")
コード例 #25
0
def read_from_store(key):
    """ Read specific key from store """
    daemon_log.info('read_from_store(): reading zfs data from %s with key: %s' % (ZFS_OBJECT_STORE_PATH, key))

    return read_store()[key]