def conn_ms_ws(): """ Connect to metasmoke websocket. """ failure_count = 0 while True: try: ws = websocket.create_connection(config_ws["ws_host"], origin=config_ws["ms_host"]) idf = r'{"channel": "ApiChannel",' +\ r'"key": "{}",'.format(config_ws["api_key"]) +\ r'"events": "feedbacks#create;posts#create"}' payload = json.dumps({"command": "subscribe", "identifier": idf}) ws.send(payload) ws.settimeout(config_ws["timeout"]) return ws except Exception as e: msg.output(str(e), msg.WARNING, tags=["WebSocket"]) failure_count += 1 if config_ws["max_retry"] > -1: # -1 is unlimited. if failure_count > config_ws["max_retry"]: msg.output("Cannot connect to Metasmoke WebSocket.", msg.ERROR, tags=["WebSocket"]) raise RuntimeError( "Cannot connect to Metasmoke WebSocket.") if config_ws["retry_sleep"]: time.sleep(config_ws["retry_sleep"])
def feedback_over_threshold(post_id, feedbacks): """ Determine if feedbacks for a post is over learning threshold. """ # Note: post_id can be anything printable with .format() # It is only used for outputting and not related to core functionalities. tp_count = 0 fp_count = 0 naa_count = 0 for user, feedback in feedbacks: if feedback.startswith("tp"): tp_count += 1 continue if feedback.startswith("fp"): fp_count += 1 continue if feedback.startswith("naa") or feedback.startswith("ignore"): naa_count += 1 continue msg.output("Feedback {} by {} on post {} not recognized.".format( feedback, user, post_id), msg.DEBUG, tags=["Feedback"]) msg.output("Feedback for post {} extracted as {}/{}/{}.".format( post_id, tp_count, fp_count, naa_count), msg.VERBOSE, tags=["Feedback"]) w_tp = tp_count + ml_config["feedback"]["naa_to_tp"] * naa_count w_fp = fp_count + ml_config["feedback"]["naa_to_fp"] * naa_count msg.output("Weighed feedback for post {} calculated as {}/{}.".format( post_id, w_tp, w_fp), msg.VERBOSE, tags=["Feedback"]) if w_fp == 0 and w_tp >= ml_config["feedback"]["un_thres"]: return True if w_tp == 0 and w_fp >= ml_config["feedback"]["un_thres"]: return False if w_tp and w_fp: if w_tp / w_fp >= ml_config["feedback"]["co_thres"]: return True if w_fp / w_tp >= ml_config["feedback"]["co_thres"]: return False # Not over threshold yet return None
def depth_first_exec(post_id, output_prefix, route, is_tp, prev_output): """ Perform depth first search on the route tree. """ # Pass None for is_tp if classfying, # True if learn as tp and False if learn as fp. output_prefix += "-" + route["exec"] exec_info = ml_config["exec"][route["exec"]] bin_with_args = [exec_info["bin"]] out_and_err = None if exec_info["type"] == 0: out_and_err = call(bin_with_args, prev_output) elif exec_info["type"] == 1: if is_tp is None: bin_with_args.append("C") else: bin_with_args.append("{}".format("T" if is_tp else "F")) bin_with_args.append("{}".format(route["data"])) out_and_err = call(bin_with_args, prev_output) elif exec_info["type"] == 2: if is_tp is None: bin_with_args.append("C") bin_with_args.append("{}".format(route["data"])) out_and_err = call(bin_with_args, prev_output) else: bin_with_args.append("t" if is_tp else "f") bin_with_args.append("{}".format(route["data"])) out_and_err = call(bin_with_args, prev_output) if not out_and_err[1]: # If out_and_err[1], pass to the handler outside this if block; # Proceed to the next learning stage otherwise. try: if out_and_err[0].decode("utf-8").rstrip() == "LEARN": # Change "t" or "f" to "T" or "F" bin_with_args[1] = bin_with_args[1].upper() out_and_err = call(bin_with_args, prev_output) except Exception as e: # Fake out_and_err out_and_err = ( out_and_err[0], "In ml.py, depth_first_exec(): {}: {}".format( type(e).__name__, e)) if out_and_err[1]: err_msg = "Errors occured when {}ing post {}: {}".format( "classify" if is_tp is None else "learn", post_id, out_and_err[1].decode("utf-8").rstrip()) msg.output( err_msg, msg.WARNING, tags=["Classify" if is_tp is None else "Learn", output_prefix]) return # Terminate this route if route["endpoint"]: out_msg = "Post {} {}: {}".format( post_id, "classified" if is_tp is None else "learned", out_and_err[0].decode("utf-8").rstrip()) msg.output( out_msg, msg.INFO if is_tp is None else msg.DEBUG, tags=["Classify" if is_tp is None else "Learn", output_prefix]) return for subroute in route["succ"]: depth_first_exec(post_id, output_prefix, subroute, is_tp, out_and_err[0])
def ms_ws_listener(): """ Metasmoke websocket listener. """ ws = conn_ms_ws() msg.output("Connected to Metasmoke WebSocket.", msg.DEBUG, tags=["WebSocket"]) while True: try: resp = ws.recv() try: data = json.loads(resp) except Exception: msg.output("Metasmoke WebSocket response is invalid.", msg.WARNING, tags=["WebSocket"]) continue if "type" in data: if data["type"] == "welcome": msg.output("Metasmoke WebSocket welcome message received.", msg.DEBUG, tags=["WebSocket"]) continue if data["type"] == "reject_subscription": msg.output("Metasmoke WebSocket subscription rejected.", msg.CRITICAL, tags=["WebSocket"]) raise RuntimeError( "Metasmoke WebSocket subscription rejected.") if data["type"] == "ping": msg.output("Metasmoke WebSocket ping received.", msg.VERBOSE, tags=["WebSocket"]) continue if "message" not in data: msg.output( "Metasmoke WebSocket response does not contain message field.", msg.DEBUG, tags=["WebSocket"]) continue message = data["message"] msg.output("Metasmoke WebSocket message received.", msg.VERBOSE, tags=["WebSocket"]) if message["event_class"] == "Post": # New post created. Analyze it. post_id = message["object"]["id"] msg.output( "New post {} received from Metasmoke WebSocket.".format( post_id), msg.INFO, tags=["WebSocket", "Post"]) user = dataproc.get_user(message["object"]["user_link"], message["object"]["username"]) msg.output("Author of post {} extracted as {}.".format( post_id, user), msg.VERBOSE, tags=["Post"]) post_tuple = (message["object"]["title"], message["object"]["body"], user) msg.output("Post tuple for {} formed as {}.".format( post_id, post_tuple), msg.VERBOSE, tags=["Post"]) ml.exec_ml(post_id, post_tuple, None) if message["event_class"] == "Feedback": # Updates on feedback. Check if over threshold. post_id = message["object"]["post_id"] msg.output( "New feedback event for post {} received from Metasmoke WebSocket." .format(post_id), msg.DEBUG, tags=["WebSocket", "Feedback"]) time.sleep(1) # This is needed due to an issue in MS API. try: feedbacks = msapi.get_feedback(post_id) except ValueError as e: msg.output("{}".format(e), msg.WARNING, tags=["HTTP", "Feedback"]) continue msg.output( "Feedbacks for post {} fetched from Metasmoke HTTP API as {}." .format(post_id, feedbacks), msg.VERBOSE, tags=["HTTP", "Feedback"]) is_over_thres = ml.feedback_over_threshold(post_id, feedbacks) if is_over_thres is None: # Not yet. msg.output( "Feedbacks for post {} are insufficient.".format( post_id), msg.DEBUG, tags=["Feedback"]) continue msg.output("Post {} registered as {} by feedbacks.".format( post_id, "tp" if is_over_thres else "fp"), msg.INFO, tags=["Feedback"]) # Fetch post to be learned. try: post_tuple = msapi.get_post(post_id) except ValueError as e: msg.output("{}".format(e), msg.WARNING, tags=["HTTP", "Post"]) continue msg.output( "Post tuple for {} fetched from Metasmoke HTTP API as {}.". format(post_id, post_tuple), msg.VERBOSE, tags=["HTTP", "Post"]) ml.exec_ml(post_id, post_tuple, is_over_thres) except RuntimeError as e: # Severe errors msg.output("{}".format(e), msg.ERROR, tags=["Framework"]) raise except KeyboardInterrupt: msg.output("User enforced program termination.", msg.DEBUG, tags=["Framework"]) ws.close() return None except Exception as e: msg.output("{}".format(e), msg.WARNING, tags=["Framework"]) # Reconnect. try: ws.close() except Exception: pass ws = conn_ms_ws() msg.output("Reconnected to Metasmoke WebSocket.", msg.DEBUG, tags=["WebSocket"])
pass ws = conn_ms_ws() msg.output("Reconnected to Metasmoke WebSocket.", msg.DEBUG, tags=["WebSocket"]) if __name__ == "__main__": cfg_arg = [x for x in sys.argv if x.startswith("--config=")] if len(cfg_arg) != 1: cfg_location = "cfg.json" # Default else: cfg_location = cfg_arg[0].split("=", 1)[1] config = cfgparse.parse(cfg_location) startup_str = "SpamSoup {major} ({alias}) started at {major}.{minor} on {user}/{inst}." startup_str = startup_str.format(major=ver_info["major"], alias=ver_info["alias"], minor=ver_info["minor"], user=ver_info["user"], inst=ver_info["inst"]) config_ws = config["ws"] msg.config(config["msg"]) msapi.config(config["msapi"]) ml.config(config["ml"]) msg.output(startup_str, msg.INFO, tags=["Framework"]) ms_ws_listener()