예제 #1
0
class TheMachine(object):
    RETRY_PERIOD = 30
    THREAD_NAME = "Instana Machine"

    agent = None
    fsm = None
    timer = None

    warnedPeriodic = False

    def __init__(self, agent):
        package_version = 'unknown'
        try:
            package_version = pkg_resources.get_distribution('instana').version
        except pkg_resources.DistributionNotFound:
            pass

        logger.info("Stan is on the scene.  Starting Instana instrumentation version: %s", package_version)
        logger.debug("initializing fsm")

        self.agent = agent
        self.fsm = Fysom({
            "events": [
                ("lookup",   "*",            "found"),
                ("announce", "found",        "announced"),
                ("pending",  "announced",    "wait4init"),
                ("ready",    "wait4init",    "good2go")],
            "callbacks": {
                # Can add the following to debug
                # "onchangestate":  self.print_state_change,
                "onlookup":       self.lookup_agent_host,
                "onannounce":     self.announce_sensor,
                "onpending":      self.agent.start,
                "onready":        self.on_ready}})

        self.timer = t.Timer(5, self.fsm.lookup)
        self.timer.daemon = True
        self.timer.name = self.THREAD_NAME
        self.timer.start()

    @staticmethod
    def print_state_change(e):
        logger.debug('========= (%i#%s) FSM event: %s, src: %s, dst: %s ==========',
                     os.getpid(), t.current_thread().name, e.event, e.src, e.dst)

    def reset(self):
        """
        reset is called to start from scratch in a process.  It may be called on first boot or
        after a detected fork.

        Here we time a new announce cycle in the future so that any existing threads have time
        to exit before we re-create them.

        :return: void
        """
        logger.debug("State machine being reset.  Will schedule new announce cycle 6 seconds from now.")

        self.timer = t.Timer(6, self.fsm.lookup)
        self.timer.daemon = True
        self.timer.name = self.THREAD_NAME
        self.timer.start()

    def lookup_agent_host(self, e):
        self.agent.should_threads_shutdown.clear()

        host, port = self.__get_agent_host_port()

        if self.agent.is_agent_listening(host, port):
            self.agent.host = host
            self.agent.port = port
            self.fsm.announce()
            return True
        elif os.path.exists("/proc/"):
            host = get_default_gateway()
            if host:
                if self.agent.is_agent_listening(host, port):
                    self.agent.host = host
                    self.agent.port = port
                    self.fsm.announce()
                    return True

        if self.warnedPeriodic is False:
            logger.warn("Instana Host Agent couldn't be found. Will retry periodically...")
            self.warnedPeriodic = True

        self.schedule_retry(self.lookup_agent_host, e, self.THREAD_NAME + ": agent_lookup")
        return False

    def announce_sensor(self, e):
        logger.debug("Announcing sensor to the agent")
        pid = os.getpid()

        try:
            if os.path.isfile("/proc/self/cmdline"):
                with open("/proc/self/cmdline") as cmd:
                    cmdinfo = cmd.read()
                cmdline = cmdinfo.split('\x00')
            else:
                # Python doesn't provide a reliable method to determine what
                # the OS process command line may be.  Here we are forced to
                # rely on ps rather than adding a dependency on something like
                # psutil which requires dev packages, gcc etc...
                proc = subprocess.Popen(["ps", "-p", str(pid), "-o", "command"],
                                        stdout=subprocess.PIPE)
                (out, err) = proc.communicate()
                parts = out.split(b'\n')
                cmdline = [parts[1].decode("utf-8")]
        except Exception:
            cmdline = sys.argv
            logger.debug("announce_sensor", exc_info=True)

        d = Discovery(pid=self.__get_real_pid(),
                      name=cmdline[0],
                      args=cmdline[1:])

        # If we're on a system with a procfs
        if os.path.exists("/proc/"):
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.connect((self.agent.host, 42699))
            path = "/proc/%d/fd/%d" % (pid, sock.fileno())
            d.fd = sock.fileno()
            d.inode = os.readlink(path)

        response = self.agent.announce(d)

        if response and (response.status_code is 200) and (len(response.content) > 2):
            self.agent.set_from(response.content)
            self.fsm.pending()
            logger.debug("Announced pid: %s (true pid: %s).  Waiting for Agent Ready...", str(pid), str(self.agent.from_.pid))
            return True
        else:
            logger.debug("Cannot announce sensor. Scheduling retry.")
            self.schedule_retry(self.announce_sensor, e, self.THREAD_NAME + ": announce")
        return False

    def schedule_retry(self, fun, e, name):
        self.timer = t.Timer(self.RETRY_PERIOD, fun, [e])
        self.timer.daemon = True
        self.timer.name = name
        self.timer.start()

    def on_ready(self, _):
        logger.info("Host agent available. We're in business. Announced pid: %s (true pid: %s)",
                    str(os.getpid()), str(self.agent.from_.pid))

    def __get_real_pid(self):
        """
        Attempts to determine the true process ID by querying the
        /proc/<pid>/sched file.  This works on systems with a proc filesystem.
        Otherwise default to os default.
        """
        pid = None

        if os.path.exists("/proc/"):
            sched_file = "/proc/%d/sched" % os.getpid()

            if os.path.isfile(sched_file):
                try:
                    file = open(sched_file)
                    line = file.readline()
                    g = re.search(r'\((\d+),', line)
                    if len(g.groups()) == 1:
                        pid = int(g.groups()[0])
                except Exception:
                    logger.debug("parsing sched file failed", exc_info=True)

        if pid is None:
            pid = os.getpid()

        return pid

    def __get_agent_host_port(self):
        """
        Iterates the the various ways the host and port of the Instana host
        agent may be configured: default, env vars, sensor options...
        """
        host = AGENT_DEFAULT_HOST
        port = AGENT_DEFAULT_PORT

        if "INSTANA_AGENT_HOST" in os.environ:
            host = os.environ["INSTANA_AGENT_HOST"]
            if "INSTANA_AGENT_PORT" in os.environ:
                port = int(os.environ["INSTANA_AGENT_PORT"])

        elif "INSTANA_AGENT_IP" in os.environ:
            # Deprecated: INSTANA_AGENT_IP environment variable
            # To be removed in a future version
            host = os.environ["INSTANA_AGENT_IP"]
            if "INSTANA_AGENT_PORT" in os.environ:
                port = int(os.environ["INSTANA_AGENT_PORT"])

        elif self.agent.sensor.options.agent_host != "":
            host = self.agent.sensor.options.agent_host
            if self.agent.sensor.options.agent_port != 0:
                port = self.agent.sensor.options.agent_port

        return host, port
예제 #2
0
class TheMachine(object):
    RETRY_PERIOD = 30
    THREAD_NAME = "Instana Machine"

    agent = None
    fsm = None
    timer = None

    warnedPeriodic = False

    def __init__(self, agent):
        logger.debug("Initializing host agent state machine")

        self.agent = agent
        self.fsm = Fysom({
            "events": [("lookup", "*", "found"),
                       ("announce", "found", "announced"),
                       ("pending", "announced", "wait4init"),
                       ("ready", "wait4init", "good2go")],
            "callbacks": {
                # Can add the following to debug
                # "onchangestate":  self.print_state_change,
                "onlookup": self.lookup_agent_host,
                "onannounce": self.announce_sensor,
                "onpending": self.on_ready,
                "ongood2go": self.on_good2go
            }
        })

        self.timer = threading.Timer(1, self.fsm.lookup)
        self.timer.daemon = True
        self.timer.name = self.THREAD_NAME

        # Only start the announce process when not in Test
        if not "INSTANA_TEST" in os.environ:
            self.timer.start()

    @staticmethod
    def print_state_change(e):
        logger.debug(
            '========= (%i#%s) FSM event: %s, src: %s, dst: %s ==========',
            os.getpid(),
            threading.current_thread().name, e.event, e.src, e.dst)

    def reset(self):
        """
        reset is called to start from scratch in a process.  It may be called on first boot or
        after a detected fork.

        Here we time a new announce cycle in the future so that any existing threads have time
        to exit before we re-create them.

        :return: void
        """
        logger.debug(
            "State machine being reset.  Will start a new announce cycle.")
        self.fsm.lookup()

    def lookup_agent_host(self, e):
        host = self.agent.options.agent_host
        port = self.agent.options.agent_port

        if self.agent.is_agent_listening(host, port):
            self.fsm.announce()
            return True

        if os.path.exists("/proc/"):
            host = get_default_gateway()
            if host:
                if self.agent.is_agent_listening(host, port):
                    self.agent.options.agent_host = host
                    self.agent.options.agent_port = port
                    self.fsm.announce()
                    return True

        if self.warnedPeriodic is False:
            logger.info(
                "Instana Host Agent couldn't be found. Will retry periodically..."
            )
            self.warnedPeriodic = True

        self.schedule_retry(self.lookup_agent_host, e,
                            self.THREAD_NAME + ": agent_lookup")
        return False

    def announce_sensor(self, e):
        logger.debug(
            "Attempting to make an announcement to the agent on %s:%d",
            self.agent.options.agent_host, self.agent.options.agent_port)
        pid = os.getpid()

        try:
            if os.path.isfile("/proc/self/cmdline"):
                with open("/proc/self/cmdline") as cmd:
                    cmdinfo = cmd.read()
                cmdline = cmdinfo.split('\x00')
            else:
                # Python doesn't provide a reliable method to determine what
                # the OS process command line may be.  Here we are forced to
                # rely on ps rather than adding a dependency on something like
                # psutil which requires dev packages, gcc etc...
                proc = subprocess.Popen(
                    ["ps", "-p", str(pid), "-o", "command"],
                    stdout=subprocess.PIPE)
                (out, _) = proc.communicate()
                parts = out.split(b'\n')
                cmdline = [parts[1].decode("utf-8")]
        except Exception:
            cmdline = sys.argv
            logger.debug("announce_sensor", exc_info=True)

        d = Discovery(pid=self.__get_real_pid(),
                      name=cmdline[0],
                      args=cmdline[1:])

        # If we're on a system with a procfs
        if os.path.exists("/proc/"):
            try:
                # In CentOS 7, some odd things can happen such as:
                # PermissionError: [Errno 13] Permission denied: '/proc/6/fd/8'
                # Use a try/except as a safety
                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                sock.connect((self.agent.options.agent_host,
                              self.agent.options.agent_port))
                path = "/proc/%d/fd/%d" % (pid, sock.fileno())
                d.fd = sock.fileno()
                d.inode = os.readlink(path)
            except:
                logger.debug("Error generating file descriptor: ",
                             exc_info=True)

        response = self.agent.announce(d)

        if response and (response.status_code
                         == 200) and (len(response.content) > 2):
            self.agent.set_from(response.content)
            self.fsm.pending()
            logger.debug(
                "Announced pid: %s (true pid: %s).  Waiting for Agent Ready...",
                str(pid), str(self.agent.announce_data.pid))
            return True

        logger.debug("Cannot announce sensor. Scheduling retry.")
        self.schedule_retry(self.announce_sensor, e,
                            self.THREAD_NAME + ": announce")
        return False

    def schedule_retry(self, fun, e, name):
        self.timer = threading.Timer(self.RETRY_PERIOD, fun, [e])
        self.timer.daemon = True
        self.timer.name = name
        self.timer.start()

    def on_ready(self, _):
        self.agent.start()

        ns_pid = str(os.getpid())
        true_pid = str(self.agent.announce_data.pid)

        logger.info(
            "Instana host agent available. We're in business. Announced PID: %s (true pid: %s)",
            ns_pid, true_pid)

    def on_good2go(self, _):
        ns_pid = str(os.getpid())
        true_pid = str(self.agent.announce_data.pid)

        self.agent.log_message_to_host_agent(
            "Instana Python Package %s: PID %s (true pid: %s) is now online and reporting"
            % (VERSION, ns_pid, true_pid))

    def __get_real_pid(self):
        """
        Attempts to determine the true process ID by querying the
        /proc/<pid>/sched file.  This works on systems with a proc filesystem.
        Otherwise default to os default.
        """
        pid = None

        if os.path.exists("/proc/"):
            sched_file = "/proc/%d/sched" % os.getpid()

            if os.path.isfile(sched_file):
                try:
                    file = open(sched_file)
                    line = file.readline()
                    g = re.search(r'\((\d+),', line)
                    if len(g.groups()) == 1:
                        pid = int(g.groups()[0])
                except Exception:
                    logger.debug("parsing sched file failed", exc_info=True)

        if pid is None:
            pid = os.getpid()

        return pid