Esempio n. 1
0
    def _get_swarm_stat(self):
        leader = get_etcd_value("/lain/swarm/docker/swarm/leader")
        cmd = [
            "timeout", "3", "docker", "-H",
            "swarm.lain:%d" % self._swarm_manager_port, "version"
        ]
        is_healthy = 0
        is_alive = 0
        has_leader = 0

        if leader != "":
            has_leader = 1

        with open("/dev/null", "w") as f:
            if subprocess.call(cmd, stdout=f) == 0:
                is_alive = 1

            cmd[5] = "info"
            if subprocess.call(cmd, stdout=f) == 0:
                is_healthy = 1

        self._result.append(
            GraphiteData("lain.cluster.swarm.alive", self._endpoint, is_alive,
                         self._step, "status"))
        self._result.append(
            GraphiteData("lain.cluster.swarm.health", self._endpoint,
                         is_healthy, self._step, "status"))
        self._result.append(
            GraphiteData("lain.cluster.swarm.leaderkey", self._endpoint,
                         has_leader, self._step, "status"))
Esempio n. 2
0
 def _get_ceph_stat(self):
     '''
     Get the mfs status
     '''
     is_mounted = 1 if os.path.ismount(self._ceph_fuse) else 0
     self._result.append(
         GraphiteData("lain.cluster.cephfuse.mounted", self._endpoint,
                      is_mounted, self._step, "status"))
Esempio n. 3
0
    def _collect_lainlet_debug_info(self):
        connections = 0
        goroutines = 0
        try:
            resp = requests.get(self._debug_url, timeout=1)
            data = resp.json()
            connections = data['connections']
            goroutines = data['goroutines']
        except Exception:
            pass

        self._result.append(
            GraphiteData("lain.cluster.lainlet.goroutines",
                         self._endpoint, goroutines, self._step, "val"))
        self._result.append(
            GraphiteData("lain.cluster.lainlet.connections",
                         self._endpoint, connections, self._step, "val"))
Esempio n. 4
0
    def _get_etcd_stat(self):
        is_alive = 0
        try:
            out = subprocess.check_output("etcdctl cluster-health", shell=True)
        except subprocess.CalledProcessError:
            pass

        if out.find("unhealthy") == -1:  # All swarm-agent is healthy
            is_alive = 1
        self._result.append(
            GraphiteData("lain.cluster.etcd.alive", self._endpoint, is_alive,
                         self._step, "status"))
Esempio n. 5
0
    def _get_docker_devicemapper_stat(self):
        data_percent = 0
        meta_percent = 0
        try:
            resp = requests.get("http://docker.lain:%s/info" %
                                self._docker_port,
                                timeout=5)
            data = resp.json()
            driver_status = data["DriverStatus"]
            data_used = 0
            data_total = 0
            meta_used = 0
            meta_total = 0

            for stat in driver_status:
                if stat[0] == "Data Space Total":
                    data_total = self._get_size_byte(stat[1])
                elif stat[0] == "Data Space Used":
                    data_used = self._get_size_byte(stat[1])
                elif stat[0] == "Metadata Space Total":
                    meta_total = self._get_size_byte(stat[1])
                elif stat[0] == "Metadata Space Used":
                    meta_used = self._get_size_byte(stat[1])

            if data_total != 0:
                data_percent = "%.2f" % (data_used / data_total)
            if meta_total != 0:
                meta_percent = "%.2f" % (meta_used / meta_total)
        except Exception:
            pass

        self._result.append(
            GraphiteData("lain.cluster.docker.devicemapper.data.used",
                         self._endpoint, data_percent, self._step, "percent"))
        self._result.append(
            GraphiteData("lain.cluster.docker.devicemapper.metadata.used",
                         self._endpoint, meta_percent, self._step, "percent"))
Esempio n. 6
0
    def _collect_deployd_debug_info(self):
        is_alive = 0
        try:
            domain = get_etcd_value("/lain/config/domain")
            resp = requests.get("http://deployd.lain:%d/debug/vars" %
                                self._deployd_port,
                                timeout=1)
            if resp.status_code == 200:
                is_alive = 1
        except Exception:
            pass

        self._result.append(
            GraphiteData("lain.cluster.deployd.alive", self._endpoint,
                         is_alive, self._step, "status"))
Esempio n. 7
0
 def _get_cali_veth_stat(self):
     '''
     Check the status of all network interfaces.
     Value is 1 if any one of them is DOWN
     '''
     cali_veth_up = 0
     cali_veth_down = 0
     cali_veth_total = 0
     tmp_veth_up = 0
     tmp_veth_down = 0
     tmp_veth_total = 0
     for name, stat in psutil.net_if_stats().iteritems():
         if name.startswith('cali'):
             cali_veth_total += 1
             if stat.isup:
                 cali_veth_up += 1
             else:
                 cali_veth_down += 1
         elif name.startswith('tmp'):
             tmp_veth_total += 1
             if stat.isup:
                 tmp_veth_up += 1
             else:
                 tmp_veth_down += 1
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.cali.up", self._endpoint,
                      cali_veth_up, self._step, "val"))
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.cali.down", self._endpoint,
                      cali_veth_down, self._step, "val"))
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.cali.total", self._endpoint,
                      cali_veth_total, self._step, "val"))
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.tmp.up", self._endpoint,
                      tmp_veth_up, self._step, "val"))
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.tmp.down", self._endpoint,
                      tmp_veth_down, self._step, "val"))
     self._result.append(
         GraphiteData("lain.cluster.calico.veth.tmp.total", self._endpoint,
                      tmp_veth_total, self._step, "val"))