Esempio n. 1
0
def conn_ms_ws():
    """ Connect to metasmoke websocket. """
    failure_count = 0
    while True:
        try:
            ws = websocket.create_connection(config_ws["ws_host"],
                                             origin=config_ws["ms_host"])
            idf = r'{"channel": "ApiChannel",' +\
                  r'"key": "{}",'.format(config_ws["api_key"]) +\
                  r'"events": "feedbacks#create;posts#create"}'
            payload = json.dumps({"command": "subscribe", "identifier": idf})

            ws.send(payload)
            ws.settimeout(config_ws["timeout"])
            return ws
        except Exception as e:
            msg.output(str(e), msg.WARNING, tags=["WebSocket"])
            failure_count += 1
            if config_ws["max_retry"] > -1:  # -1 is unlimited.
                if failure_count > config_ws["max_retry"]:
                    msg.output("Cannot connect to Metasmoke WebSocket.",
                               msg.ERROR,
                               tags=["WebSocket"])
                    raise RuntimeError(
                        "Cannot connect to Metasmoke WebSocket.")

            if config_ws["retry_sleep"]:
                time.sleep(config_ws["retry_sleep"])
Esempio n. 2
0
def feedback_over_threshold(post_id, feedbacks):
    """ Determine if feedbacks for a post is over learning threshold. """
    # Note: post_id can be anything printable with .format()
    # It is only used for outputting and not related to core functionalities.

    tp_count = 0
    fp_count = 0
    naa_count = 0

    for user, feedback in feedbacks:
        if feedback.startswith("tp"):
            tp_count += 1
            continue

        if feedback.startswith("fp"):
            fp_count += 1
            continue

        if feedback.startswith("naa") or feedback.startswith("ignore"):
            naa_count += 1
            continue

        msg.output("Feedback {} by {} on post {} not recognized.".format(
            feedback, user, post_id),
                   msg.DEBUG,
                   tags=["Feedback"])

    msg.output("Feedback for post {} extracted as {}/{}/{}.".format(
        post_id, tp_count, fp_count, naa_count),
               msg.VERBOSE,
               tags=["Feedback"])

    w_tp = tp_count + ml_config["feedback"]["naa_to_tp"] * naa_count
    w_fp = fp_count + ml_config["feedback"]["naa_to_fp"] * naa_count
    msg.output("Weighed feedback for post {} calculated as {}/{}.".format(
        post_id, w_tp, w_fp),
               msg.VERBOSE,
               tags=["Feedback"])

    if w_fp == 0 and w_tp >= ml_config["feedback"]["un_thres"]:
        return True
    if w_tp == 0 and w_fp >= ml_config["feedback"]["un_thres"]:
        return False

    if w_tp and w_fp:
        if w_tp / w_fp >= ml_config["feedback"]["co_thres"]:
            return True
        if w_fp / w_tp >= ml_config["feedback"]["co_thres"]:
            return False

    # Not over threshold yet
    return None
Esempio n. 3
0
def depth_first_exec(post_id, output_prefix, route, is_tp, prev_output):
    """ Perform depth first search on the route tree. """
    # Pass None for is_tp if classfying,
    # True if learn as tp and False if learn as fp.

    output_prefix += "-" + route["exec"]
    exec_info = ml_config["exec"][route["exec"]]

    bin_with_args = [exec_info["bin"]]
    out_and_err = None
    if exec_info["type"] == 0:
        out_and_err = call(bin_with_args, prev_output)
    elif exec_info["type"] == 1:
        if is_tp is None:
            bin_with_args.append("C")
        else:
            bin_with_args.append("{}".format("T" if is_tp else "F"))
        bin_with_args.append("{}".format(route["data"]))
        out_and_err = call(bin_with_args, prev_output)
    elif exec_info["type"] == 2:
        if is_tp is None:
            bin_with_args.append("C")
            bin_with_args.append("{}".format(route["data"]))
            out_and_err = call(bin_with_args, prev_output)
        else:
            bin_with_args.append("t" if is_tp else "f")
            bin_with_args.append("{}".format(route["data"]))
            out_and_err = call(bin_with_args, prev_output)
            if not out_and_err[1]:
                # If out_and_err[1], pass to the handler outside this if block;
                # Proceed to the next learning stage otherwise.
                try:
                    if out_and_err[0].decode("utf-8").rstrip() == "LEARN":
                        # Change "t" or "f" to "T" or "F"
                        bin_with_args[1] = bin_with_args[1].upper()
                        out_and_err = call(bin_with_args, prev_output)
                except Exception as e:
                    # Fake out_and_err
                    out_and_err = (
                        out_and_err[0],
                        "In ml.py, depth_first_exec(): {}: {}".format(
                            type(e).__name__, e))

    if out_and_err[1]:
        err_msg = "Errors occured when {}ing post {}: {}".format(
            "classify" if is_tp is None else "learn", post_id,
            out_and_err[1].decode("utf-8").rstrip())
        msg.output(
            err_msg,
            msg.WARNING,
            tags=["Classify" if is_tp is None else "Learn", output_prefix])
        return  # Terminate this route

    if route["endpoint"]:
        out_msg = "Post {} {}: {}".format(
            post_id, "classified" if is_tp is None else "learned",
            out_and_err[0].decode("utf-8").rstrip())
        msg.output(
            out_msg,
            msg.INFO if is_tp is None else msg.DEBUG,
            tags=["Classify" if is_tp is None else "Learn", output_prefix])
        return

    for subroute in route["succ"]:
        depth_first_exec(post_id, output_prefix, subroute, is_tp,
                         out_and_err[0])
Esempio n. 4
0
def ms_ws_listener():
    """ Metasmoke websocket listener. """
    ws = conn_ms_ws()
    msg.output("Connected to Metasmoke WebSocket.",
               msg.DEBUG,
               tags=["WebSocket"])
    while True:
        try:
            resp = ws.recv()
            try:
                data = json.loads(resp)
            except Exception:
                msg.output("Metasmoke WebSocket response is invalid.",
                           msg.WARNING,
                           tags=["WebSocket"])
                continue

            if "type" in data:
                if data["type"] == "welcome":
                    msg.output("Metasmoke WebSocket welcome message received.",
                               msg.DEBUG,
                               tags=["WebSocket"])
                    continue
                if data["type"] == "reject_subscription":
                    msg.output("Metasmoke WebSocket subscription rejected.",
                               msg.CRITICAL,
                               tags=["WebSocket"])
                    raise RuntimeError(
                        "Metasmoke WebSocket subscription rejected.")
                if data["type"] == "ping":
                    msg.output("Metasmoke WebSocket ping received.",
                               msg.VERBOSE,
                               tags=["WebSocket"])
                    continue

            if "message" not in data:
                msg.output(
                    "Metasmoke WebSocket response does not contain message field.",
                    msg.DEBUG,
                    tags=["WebSocket"])
                continue

            message = data["message"]
            msg.output("Metasmoke WebSocket message received.",
                       msg.VERBOSE,
                       tags=["WebSocket"])

            if message["event_class"] == "Post":
                # New post created. Analyze it.
                post_id = message["object"]["id"]
                msg.output(
                    "New post {} received from Metasmoke WebSocket.".format(
                        post_id),
                    msg.INFO,
                    tags=["WebSocket", "Post"])

                user = dataproc.get_user(message["object"]["user_link"],
                                         message["object"]["username"])
                msg.output("Author of post {} extracted as {}.".format(
                    post_id, user),
                           msg.VERBOSE,
                           tags=["Post"])

                post_tuple = (message["object"]["title"],
                              message["object"]["body"], user)
                msg.output("Post tuple for {} formed as {}.".format(
                    post_id, post_tuple),
                           msg.VERBOSE,
                           tags=["Post"])

                ml.exec_ml(post_id, post_tuple, None)

            if message["event_class"] == "Feedback":
                # Updates on feedback. Check if over threshold.
                post_id = message["object"]["post_id"]
                msg.output(
                    "New feedback event for post {} received from Metasmoke WebSocket."
                    .format(post_id),
                    msg.DEBUG,
                    tags=["WebSocket", "Feedback"])

                time.sleep(1)  # This is needed due to an issue in MS API.

                try:
                    feedbacks = msapi.get_feedback(post_id)
                except ValueError as e:
                    msg.output("{}".format(e),
                               msg.WARNING,
                               tags=["HTTP", "Feedback"])
                    continue
                msg.output(
                    "Feedbacks for post {} fetched from Metasmoke HTTP API as {}."
                    .format(post_id, feedbacks),
                    msg.VERBOSE,
                    tags=["HTTP", "Feedback"])

                is_over_thres = ml.feedback_over_threshold(post_id, feedbacks)
                if is_over_thres is None:
                    # Not yet.
                    msg.output(
                        "Feedbacks for post {} are insufficient.".format(
                            post_id),
                        msg.DEBUG,
                        tags=["Feedback"])
                    continue

                msg.output("Post {} registered as {} by feedbacks.".format(
                    post_id, "tp" if is_over_thres else "fp"),
                           msg.INFO,
                           tags=["Feedback"])

                # Fetch post to be learned.
                try:
                    post_tuple = msapi.get_post(post_id)
                except ValueError as e:
                    msg.output("{}".format(e),
                               msg.WARNING,
                               tags=["HTTP", "Post"])
                    continue
                msg.output(
                    "Post tuple for {} fetched from Metasmoke HTTP API as {}.".
                    format(post_id, post_tuple),
                    msg.VERBOSE,
                    tags=["HTTP", "Post"])

                ml.exec_ml(post_id, post_tuple, is_over_thres)
        except RuntimeError as e:
            # Severe errors
            msg.output("{}".format(e), msg.ERROR, tags=["Framework"])
            raise
        except KeyboardInterrupt:
            msg.output("User enforced program termination.",
                       msg.DEBUG,
                       tags=["Framework"])
            ws.close()
            return None
        except Exception as e:
            msg.output("{}".format(e), msg.WARNING, tags=["Framework"])
            # Reconnect.
            try:
                ws.close()
            except Exception:
                pass
            ws = conn_ms_ws()
            msg.output("Reconnected to Metasmoke WebSocket.",
                       msg.DEBUG,
                       tags=["WebSocket"])
Esempio n. 5
0
                pass
            ws = conn_ms_ws()
            msg.output("Reconnected to Metasmoke WebSocket.",
                       msg.DEBUG,
                       tags=["WebSocket"])


if __name__ == "__main__":
    cfg_arg = [x for x in sys.argv if x.startswith("--config=")]
    if len(cfg_arg) != 1:
        cfg_location = "cfg.json"  # Default
    else:
        cfg_location = cfg_arg[0].split("=", 1)[1]

    config = cfgparse.parse(cfg_location)

    startup_str = "SpamSoup {major} ({alias}) started at {major}.{minor} on {user}/{inst}."
    startup_str = startup_str.format(major=ver_info["major"],
                                     alias=ver_info["alias"],
                                     minor=ver_info["minor"],
                                     user=ver_info["user"],
                                     inst=ver_info["inst"])

    config_ws = config["ws"]
    msg.config(config["msg"])
    msapi.config(config["msapi"])
    ml.config(config["ml"])

    msg.output(startup_str, msg.INFO, tags=["Framework"])
    ms_ws_listener()