def getCommonTestMessages(self):
     messages = [
         wagglemsg.Message(
             name="test",
             value=1234,
             timestamp=time.time_ns(),
             meta={},
         ),
         wagglemsg.Message(
             name="e",
             value=2.71828,
             timestamp=time.time_ns(),
             meta={"user": "******"},
         ),
         wagglemsg.Message(
             name="replace.app.meta.with.sys.meta",
             value="should replace meta with app and sys meta",
             timestamp=time.time_ns(),
             meta={
                 "vsn": "Z123",
                 "job": "sure",
                 "task": "ok",
             },
         ),
     ]
     shuffle(messages)
     return messages
Beispiel #2
0
def add_system_metrics_gps(args, messages):
    """Add GPS system metrics

    Args:
        args: all program arguments
        messages: the message queue to append metric to
    """
    timestamp = time.time_ns()

    logging.info("collecting system metrics (GPS)")

    if "nxcore" not in args.waggle_host_id:
        logging.warning("skipping GPS publish for non-main host (%s)", args.waggle_host_id)
        return

    try:
        tpv_report = False
        sat_report = False
        gpsclient = GPSDClient(host=args.gpsd_host, port=args.gpsd_port)
        for result in gpsclient.dict_stream(convert_datetime=False):
            # look for a GPS report
            if not tpv_report and result["class"] == "TPV":
                tpv_report = True
                for vkey in ["lat", "lon", "alt", "epx", "epy", "epv", "mode"]:
                    value = result.get(vkey)
                    if value:
                        messages.append(
                            message.Message(
                                name="sys.gps.{name}".format(name=vkey),
                                value=float(value),
                                timestamp=timestamp,
                                meta={},
                            )
                        )
                    else:
                        logging.info("gps (%s) not found. skipping...", vkey)

            # report salellite info
            if not sat_report and result["class"] == "SKY" and result["satellites"]:
                sat_report = True
                # loop over the sallites and count number being used
                used_sats = len([x for x in result["satellites"] if x["used"]])
                messages.append(
                    message.Message(
                        name="sys.gps.satellites",
                        value=int(used_sats),
                        timestamp=timestamp,
                        meta={},
                    )
                )

            if sat_report and tpv_report:
                break

    except Exception:
        logging.exception("failed to get gps system metrics")
Beispiel #3
0
def convert_to_upload_message(msg: wagglemsg.Message, upload_publish_name: str) -> wagglemsg.Message:
    return wagglemsg.Message(
        timestamp=msg.timestamp,
        name=upload_publish_name,
        meta=msg.meta,  # TODO(sean) be careful on ownership here, in case this is mutated
        value=upload_url_for_message(msg),
    )
    def getPublishTestCases(self):
        # TODO(sean) should we fuzz test this to try lot's of different arguments
        messages = self.getCommonTestMessages()

        app_uid = str(uuid4())
        app_meta = {
            "job": f"sage-{randint(1, 1000000)}",
            "task": f"testing-{randint(1, 1000000)}",
            "host": f"{randint(1, 1000000)}.ws-nxcore",
            "plugin": f"plugin-test:{randtag()}",
            "vsn": "should be replaced",
        }
        self.updateAppMetaCache(app_uid, app_meta)

        # we expect the same messages, but with the app and sys meta tagged
        want_messages = [
            wagglemsg.Message(
                name=msg.name,
                value=msg.value,
                timestamp=msg.timestamp,
                # NOTE(sean) the order of meta is important. we should expect:
                # 1. sys meta overrides msg meta and app meta
                # 2. app meta overrides msg meta
                meta={
                    **msg.meta,
                    **app_meta,
                    **self.service.system_meta
                }) for msg in messages
        ]

        return app_uid, messages, want_messages
Beispiel #5
0
def add_system_metrics(args, messages):
    timestamp = time.time_ns()

    logging.info("collecting system metrics from %s", args.metrics_url)
    text = get_node_exporter_metrics(args.metrics_url)

    for family in text_string_to_metric_families(text):
        for sample in family.samples:
            try:
                name = prom2waggle[sample.name]
            except KeyError:
                continue

            messages.append(
                message.Message(
                    name=name,
                    value=sample.value,
                    timestamp=timestamp,
                    meta=sample.labels,
                )
            )

    add_system_metrics_tegra(args, messages)
    add_system_metrics_jetson_clocks(args, messages)
    add_system_metrics_nvme(args, messages)
    add_system_metrics_gps(args, messages)
Beispiel #6
0
def add_system_metrics_nvme(args, messages):
    """Add system metrics for an optional NVMe drive (/dev/nvme0)

    Args:
        args: all program arguments
        messages: the message queue to append metric to
    """
    timestamp = time.time_ns()

    logging.info("collecting system metrics (NVMe)")

    nvmeroot = "/dev/nvme0"
    type = "nvme-therm"
    zone = "none"
    try:
        if Path(nvmeroot).exists():
            nvmedev = Device("/dev/nvme0")
            messages.append(
                message.Message(
                    name="sys.thermal",
                    value=float(nvmedev.temperature),
                    timestamp=timestamp,
                    meta={"type": type, "zone": zone},
                )
            )
        else:
            logging.info("nvme (%s) not found. skipping...", nvmeroot)
    except Exception:
        logging.exception("failed to get nvme system metrics")
    def testPublishUpload(self):
        # TODO(sean) clean up! added as a regression test for now.
        app_uid = str(uuid4())

        tag = randtag()

        app_meta = {
            "job": f"sage-{randint(1, 1000000)}",
            "task": f"testing-{randint(1, 1000000)}",
            "host": f"{randint(1, 1000000)}.ws-nxcore",
            "plugin": f"plugin-test:{tag}",
            "vsn": "should be replaced",
        }
        self.updateAppMetaCache(app_uid, app_meta)

        timestamp = time.time_ns()
        filename = "hello.txt"

        with TemporaryDirectory() as dir:
            file = Path(dir, filename)
            file.write_text("hello")
            with get_plugin(app_uid) as plugin:
                plugin.upload_file(file,
                                   meta={"user": "******"},
                                   timestamp=timestamp)

        job = app_meta["job"]
        task = app_meta["task"]
        node = self.service.system_meta["node"]

        self.assertMessages("to-beehive", [
            wagglemsg.Message(
                name="upload",
                value=
                f"https://storage.sagecontinuum.org/api/v1/data/{job}/sage-{task}-{tag}/{node}/{timestamp}-{filename}",
                timestamp=timestamp,
                meta={
                    "user": "******",
                    "filename": "hello.txt",
                    **app_meta,
                    **self.service.system_meta,
                })
        ])

        self.assertMetrics({
            "wes_data_service_messages_total":
            1,
            "wes_data_service_messages_rejected_total":
            0,
            "wes_data_service_messages_published_node_total":
            1,
            "wes_data_service_messages_published_beehive_total":
            1,
        })
def main(args):
    if args.type in ["int", "i"]:
        v = int(args.value)
    elif args.type in ["float", "f"]:
        v = float(args.value)
    elif args.type in ["string", "str"]:
        v = str(args.value)
    else:
        raise Exception(f'Wrong type detected: {args.type}')

    meta = {}
    for m in args.meta:
        try:
            sp = m.split('=')
            meta[sp[0]] = sp[1]
        except Exception as ex:
            raise Exception(f'Failed to parser meta {args.meta}: {str(ex)}')
    meta.update({
        "node": "plugin",
        "vsn": "W000",
    })
    msg = wagglemsg.Message(
        name=args.topic,
        value=v,
        timestamp=get_timestamp(),
        meta=meta,
    )

    params = pika.ConnectionParameters(host=args.rabbitmq_host,
                                       port=args.rabbitmq_port,
                                       credentials=pika.PlainCredentials(
                                           "plugin", "plugin"),
                                       retry_delay=60,
                                       socket_timeout=10.0)

    conn = pika.BlockingConnection(params)
    ch = conn.channel()

    ch.basic_publish("data.topic",
                     args.topic,
                     wagglemsg.dump(msg),
                     properties=pika.BasicProperties(
                         delivery_mode=2,
                         user_id="plugin",
                     ))
    print("message published")
    ch.close()
    conn.close()
Beispiel #9
0
def add_uptime_metrics(args, messages):
    logging.info("collecting uptime metrics")
    timestamp = time.time_ns()
    try:
        uptime = get_uptime_seconds()
        messages.append(
            message.Message(
                name="sys.uptime",
                value=uptime,
                timestamp=timestamp,
                meta={},
            )
        )
    except FileNotFoundError:
        logging.warning("could not access /host/proc/uptime")
    except Exception:
        logging.exception("failed to get uptime")
    def getSystemPublishTestCases(self):
        messages = self.getCommonTestMessages()

        # we expect the same messages, but for system publishers we only want sys meta tagged
        want_messages = [
            wagglemsg.Message(
                name=msg.name,
                value=msg.value,
                timestamp=msg.timestamp,
                # NOTE(sean) the order of meta is important. we should expect:
                # 1. sys meta overrides msg meta
                meta={
                    **msg.meta,
                    **self.service.system_meta
                }) for msg in messages
        ]

        return messages, want_messages
Beispiel #11
0
def add_version_metrics(args, messages):
    logging.info("collecting version metrics")
    timestamp = time.time_ns()

    try:
        version = Path("/host/etc/waggle_version_os").read_text().strip()
        messages.append(
            message.Message(
                name="sys.version.os",
                value=version,
                timestamp=timestamp,
                meta={},
            )
        )
        logging.info("added os version")
    except FileNotFoundError:
        logging.info("os version not found. skipping...")
    except Exception:
        logging.exception("failed to get os version")
Beispiel #12
0
def add_provision_metrics(args, messages):
    logging.info("collecting system provision metrics")
    timestamp = time.time_ns()
    try:
        # check the last line is a complete factory provision log
        lastline = Path("/host/etc/waggle/factory_provision").read_text().strip().rsplit("\n", 1)[1]
        if "Factory Provisioning Finish" in lastline:
            date = lastline.rsplit(":", 1)[0]
            messages.append(
                message.Message(
                    name="sys.provision.factory_date",
                    value=date,
                    timestamp=timestamp,
                    meta={},
                )
            )
        logging.info("added factory provision date")
    except FileNotFoundError:
        logging.info("factory provision not found, skipping...")
    except Exception:
        logging.exception("failed to get factory provision")
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--rabbitmq_host",
                        default=getenv("RABBITMQ_HOST", "localhost"))
    parser.add_argument("--rabbitmq_port",
                        default=getenv("RABBITMQ_PORT", "5672"),
                        type=int)
    parser.add_argument("--rabbitmq_username",
                        default=getenv("RABBITMQ_USERNAME", ""))
    parser.add_argument("--rabbitmq_password",
                        default=getenv("RABBITMQ_PASSWORD", ""))
    parser.add_argument("--rabbitmq_cacertfile",
                        default=getenv("RABBITMQ_CACERTFILE", ""))
    parser.add_argument("--rabbitmq_certfile",
                        default=getenv("RABBITMQ_CERTFILE", ""))
    parser.add_argument("--rabbitmq_keyfile",
                        default=getenv("RABBITMQ_KEYFILE", ""))
    parser.add_argument("--rabbitmq_exchange",
                        default=getenv("RABBITMQ_EXCHANGE", "waggle.msg"))
    args = parser.parse_args()

    if args.rabbitmq_username != "":
        credentials = pika.PlainCredentials(args.rabbitmq_username,
                                            args.rabbitmq_password)
    else:
        credentials = pika.credentials.ExternalCredentials()

    if args.rabbitmq_cacertfile != "":
        context = ssl.create_default_context(cafile=args.rabbitmq_cacertfile)
        # HACK this allows the host and baked in host to be configured independently
        context.check_hostname = False
        if args.rabbitmq_certfile != "":
            context.load_cert_chain(args.rabbitmq_certfile,
                                    args.rabbitmq_keyfile)
        ssl_options = pika.SSLOptions(context, args.rabbitmq_host)
    else:
        ssl_options = None

    params = pika.ConnectionParameters(host=args.rabbitmq_host,
                                       port=args.rabbitmq_port,
                                       credentials=credentials,
                                       ssl_options=ssl_options,
                                       retry_delay=60,
                                       socket_timeout=10.0)

    conn = pika.BlockingConnection(params)
    ch = conn.channel()

    while True:
        msg = message.Message(name="env.temperature.gen",
                              timestamp=time.time_ns(),
                              value=random.uniform(0.0, 5.0),
                              meta={
                                  "node": "0000000000000001",
                                  "plugin": "metsense:1.0.2"
                              })
        body = message.dump(msg)
        properties = pika.BasicProperties(user_id="node-0000000000000001")
        ch.basic_publish("waggle.msg",
                         routing_key="",
                         properties=properties,
                         body=body)

        msg = message.Message(name="sys.uptime",
                              timestamp=time.time_ns(),
                              value=time.time(),
                              meta={
                                  "node": "0000000000000001",
                                  "plugin": "status:1.0.0"
                              })
        body = message.dump(msg)
        properties = pika.BasicProperties(user_id="node-0000000000000001")
        ch.basic_publish("waggle.msg",
                         routing_key="",
                         properties=properties,
                         body=body)

        msg = message.Message(name="sys.uptime",
                              timestamp=time.time_ns(),
                              value=time.time() + 1.4,
                              meta={
                                  "node": "0000000000000002",
                                  "plugin": "status:1.0.0"
                              })
        body = message.dump(msg)
        properties = pika.BasicProperties(user_id="node-0000000000000002")
        ch.basic_publish("waggle.msg",
                         routing_key="",
                         properties=properties,
                         body=body)

        msg = message.Message(name="sys.uptime",
                              timestamp=time.time_ns(),
                              value=time.time() + 2.3,
                              meta={
                                  "node": "0000000000000003",
                                  "plugin": "status:1.0.0"
                              })
        body = message.dump(msg)
        properties = pika.BasicProperties(user_id="node-0000000000000003")
        ch.basic_publish("waggle.msg",
                         routing_key="",
                         properties=properties,
                         body=body)

        time.sleep(1)
Beispiel #14
0
def add_system_metrics_tegra(args, messages):
    """Add system metrics gathered by the `tegrastats` subprocess

    Args:
        args: all program arguments
        messages: the message queue to append metric to
    """
    timestamp = time.time_ns()

    logging.info("collecting system metrics (tegra)")

    tegradata = None
    try:
        with subprocess.Popen(["tegrastats"], stdout=subprocess.PIPE) as process:
            # wait for 10 seconds to get tegrastats info
            pollresults = select([process.stdout], [], [], 10)[0]
            if pollresults:
                output = pollresults[0].readline()
                if output:
                    tegradata = output.strip().decode()

        if tegradata:
            # populate CPU frequency percentages
            ## ex. CPU [25%@652,15%@806,16%@880,31%@902,19%@960,38%@960]
            CPU_RE = re.compile(r"CPU \[(.*?)\]")
            cpudata = CPU_RE.search(tegradata)
            if cpudata:
                for idx, cpu_str in enumerate(cpudata.group(1).split(",")):
                    if "off" == cpu_str:
                        continue

                    messages.append(
                        message.Message(
                            name="sys.freq.cpu_perc",
                            value=__val_freq(cpu_str)["perc"],
                            timestamp=timestamp,
                            meta={"cpu": str(idx)},
                        )
                    )

            # populate the GPU, EMC (external memory controller),
            #  APE (audio processing engine), etc. freqency percentages
            ## ex. EMC_FREQ 1%@1600 GR3D_FREQ 0%@114 APE 150
            VALS_RE = re.compile(r"\b([A-Z0-9_]+) ([0-9%@]+)(?=[^/])\b")
            for name, val in re.findall(VALS_RE, tegradata):
                name = name.split("_")[0] if "FREQ" in name else name
                hz_data = __val_freq(val)

                # normalize to GPU names
                if name.lower() == "gr3d":
                    name = "gpu"

                if hz_data.get("perc", None) is not None:
                    messages.append(
                        message.Message(
                            name="sys.freq.{name}_perc".format(name=name.lower()),
                            value=hz_data["perc"],
                            timestamp=timestamp,
                            meta={},
                        )
                    )

                # ONLY for APE do we report current frequency as it can't
                #  be found more accurate elsewhere
                if name == "APE" and hz_data.get("freq"):
                    messages.append(
                        message.Message(
                            name="sys.freq.{name}".format(name=name.lower()),
                            value=hz_data["freq"],
                            timestamp=timestamp,
                            meta={},
                        )
                    )

            # populate Wattage data (milliwatts)
            ## ex. VDD_IN 5071/4811 VDD_CPU_GPU_CV 1315/1066 VDD_SOC 1116/1116
            WATT_RE = re.compile(r"\b(\w+) ([0-9.]+)\/([0-9.]+)\b")
            for name, current, avg in re.findall(WATT_RE, tegradata):
                messages.append(
                    message.Message(
                        name="sys.power",
                        value=int(current),
                        timestamp=timestamp,
                        meta={"name": name.lower()},
                    )
                )
        else:
            logging.info("tegrastats did not return any data. skipping...")

    except Exception:
        logging.exception("failed to get tegra system metrics")
Beispiel #15
0
def add_system_metrics_jetson_clocks(args, messages):
    """Add Jetson specific GPU and EMC frequency information to system metrics

    Args:
        args: all program arguments
        messages: the message queue to append metric to
    """
    timestamp = time.time_ns()

    logging.info("collecting system metrics (Jetson Clocks)")

    pdata = []
    try:
        with subprocess.Popen(["jetson_clocks", "--show"], stdout=subprocess.PIPE) as process:
            # wait for 10 seconds to get jetson_clocks info
            t_end = time.time() + 10
            while time.time() < t_end:
                pollresults = select([process.stdout], [], [], 2)[0]
                if pollresults:
                    output = pollresults[0].readline()
                    if output:
                        pdata.append(output.strip().decode())
                    else:
                        break

        if pdata:
            # populate the GPU and EMC min, max, current frequency
            GPU_RE = re.compile(r"GPU MinFreq=(\d+) MaxFreq=(\d+) CurrentFreq=(\d+)")
            EMC_RE = re.compile(r"EMC MinFreq=(\d+) MaxFreq=(\d+) CurrentFreq=(\d+)")
            for line in pdata:
                gpudata = GPU_RE.search(line)
                emcdata = EMC_RE.search(line)
                name = ""
                if gpudata:
                    name = "gpu"
                    freqdata = gpudata
                elif emcdata:
                    name = "emc"
                    freqdata = emcdata

                if name:
                    messages.append(
                        message.Message(
                            name="sys.freq.{name}_min".format(name=name.lower()),
                            value=int(freqdata.group(1)),
                            timestamp=timestamp,
                            meta={},
                        )
                    )
                    messages.append(
                        message.Message(
                            name="sys.freq.{name}_max".format(name=name.lower()),
                            value=int(freqdata.group(2)),
                            timestamp=timestamp,
                            meta={},
                        )
                    )
                    messages.append(
                        message.Message(
                            name="sys.freq.{name}".format(name=name.lower()),
                            value=int(freqdata.group(3)),
                            timestamp=timestamp,
                            meta={},
                        )
                    )
        else:
            logging.info("jetson_clocks did not return any data. skipping...")

    except Exception:
        logging.exception("failed to get jetson clock system metrics")