def dispatch_reply_command(msg, reply, full_cmd, comment=True): command_parts = full_cmd.split(" ", 1) if len(command_parts) == 2: cmd, args = command_parts else: cmd, = command_parts args = "" cmd = cmd.lower() quiet_action = cmd[-1] == "-" cmd = regex.sub(r"\W*$", "", cmd) if cmd in _reply_commands: func, (min_arity, max_arity) = _reply_commands[cmd] assert min_arity == 1 if max_arity == 1: return func(msg, original_msg=reply, alias_used=cmd, quiet_action=quiet_action) elif max_arity == 2: return func(msg, args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action) else: args = args.split() args.extend([None] * (max_arity - len(args))) return func(msg, *args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action) elif comment and is_privileged(reply.owner, reply.room): post_data = get_report_data(msg) if post_data: Tasks.do(metasmoke.Metasmoke.post_auto_comment, full_cmd, reply.owner, url=post_data[0])
def _start(self): while True: msg = self.socket.recv() if msg: msg = json.loads(msg) action = msg["action"] if action == "hb": self.socket.send("hb") else: data = json.loads(msg["data"]) if data["a"] == "post-deleted": try: post_id, _, post_type, post_url, callbacks = self.posts[ action] del self.posts[action] if not post_type == "answer" or ( "aId" in data and str(data["aId"]) == post_id): self.socket.send("-" + action) Tasks.do( metasmoke.Metasmoke. send_deletion_stats_for_post, post_url, True) for callback, max_time in callbacks: if not max_time or time.time() < max_time: callback() except KeyError: pass
def _start(self): while True: msg = self.socket.recv() if msg: msg = json.loads(msg) action = msg["action"] if action == "hb": self.socket.send("hb") else: data = json.loads(msg["data"]) if data["a"] == "post-deleted": try: with self.posts_lock: post_id, _, _, post_url, callbacks = self.posts[ action] if post_id == str(data["aId"] if "aId" in data else data["qId"]): with self.posts_lock: del self.posts[action] Tasks.do(self._unsubscribe, action) Tasks.do( metasmoke.Metasmoke. send_deletion_stats_for_post, post_url, True) for callback, max_time in callbacks: if not max_time or time.time() < max_time: callback() except KeyError: pass
def __init__(self): if GlobalVars.no_edit_watcher: self.socket = None return # posts is a dict with the WebSocket action as keys {site_id}-question-{question_id} as keys # with each value being: (site_id, hostname, question_id, max_time) self.posts = {} self.posts_lock = threading.Lock() self.save_handle = None self.save_handle_lock = threading.Lock() try: self.socket = websocket.create_connection( "wss://qa.sockets.stackexchange.com/") except websocket.WebSocketException: self.socket = None log('error', 'EditWatcher failed to create a websocket connection') if datahandling.has_pickle(PICKLE_FILENAME): pickle_data = datahandling.load_pickle(PICKLE_FILENAME) now = time.time() new_posts = { action: value for action, value in pickle_data if value[-1] > now } with self.posts_lock: self.posts = new_posts for action in new_posts.keys(): Tasks.do(self._subscribe, action) self._schedule_save() threading.Thread(name="edit watcher", target=self._start, daemon=True).start()
def subscribe(self, post_url, callback=None, pickle=True, timeout=None): post_id, post_site, post_type = fetch_post_id_and_site_from_url(post_url) if post_site not in GlobalVars.site_id_dict: log("warning", "unknown site {} when subscribing to {}".format(post_site, post_url)) return if post_type == "answer": question_id = datahandling.get_post_site_id_link((post_id, post_site, post_type)) if question_id is None: return else: question_id = post_id site_id = GlobalVars.site_id_dict[post_site] action = "{}-question-{}".format(site_id, question_id) max_time = (time.time() + timeout) if timeout else None if action not in self.posts: self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else []) try: self.socket.send(action) except websocket.WebSocketException: log('error', 'DeletionWatcher failed on sending {}'.format(action)) elif callback: _, _, _, _, callbacks = self.posts[action] callbacks.append((callback, max_time)) else: return if pickle: Tasks.do(self._save)
def subscribe(self, post_url, callback=None, pickle=True, timeout=None): post_id, post_site, post_type = fetch_post_id_and_site_from_url( post_url) if post_site not in GlobalVars.site_id_dict: log( "warning", "unknown site {} when subscribing to {}".format( post_site, post_url)) return if post_type == "answer": question_id = datahandling.get_post_site_id_link( (post_id, post_site, post_type)) if question_id is None: return else: question_id = post_id site_id = GlobalVars.site_id_dict[post_site] action = "{}-question-{}".format(site_id, question_id) max_time = (time.time() + timeout) if timeout else None if action in self.posts and callback: _, _, _, _, callbacks = self.posts[action] callbacks.append((callback, max_time)) else: self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else []) self.socket.send(action) if pickle: Tasks.do(self._save)
def subscribe(self, post_url=None, hostname=None, site_id=None, question_id=None, pickle=True, timeout=DEFAULT_TIMEOUT, max_time=None, from_time=None): if GlobalVars.no_edit_watcher: return if post_url and not ((hostname or site_id) and question_id): post_id, hostname, post_type = fetch_post_id_and_site_from_url(post_url) if post_type == "answer": question_id = datahandling.get_post_site_id_link((post_id, hostname, post_type)) if question_id is None: log("warning", "Unable to get question ID when subscribing to: hostname: " "{} :: post ID:{} when subscribing to {}".format(hostname, post_id, post_url)) return else: question_id = post_id if post_type != "question": log("warning", "tried to edit-watch non-question: hostname: " "{} :: post ID:{} when subscribing to {}".format(hostname, question_id, post_url)) return if not site_id or not hostname: with GlobalVars.site_id_dict_lock: if not site_id and hostname: site_id = GlobalVars.site_id_dict.get(hostname) if site_id and not hostname: hostname = GlobalVars.site_id_dict_by_id.get(site_id) if not site_id or not hostname: log("warning", "unable to determine a valid site ID or hostname when subscribing to question ID " "{}:: site_id:{}:: hostname:{}:: post_url:{}".format(question_id, site_id, hostname, post_url)) return question_ids = question_id if type(question_ids) != list: question_ids = [question_id] now = time.time() if from_time: now = from_time if not max_time: max_time = now + timeout updated = None to_subscribe = [] with self.posts_lock: for question_id in question_ids: action = "{}-question-{}".format(site_id, question_id) if action not in self.posts: self.posts[action] = (site_id, hostname, question_id, max_time) to_subscribe.append(action) else: old_max_time = self.posts[action][2] if max_time > old_max_time: self.posts[action] = (site_id, hostname, question_id, max_time) elif updated is None: updated = False for action in to_subscribe: print('scheduling subscription to action:', action) Tasks.do(self._subscribe, action) if updated and pickle: self._schedule_save()
def _start(self): while True: msg = self.socket.recv() if msg: msg = json.loads(msg) action = msg["action"] if action == "hb": self.socket.send("hb") else: data = json.loads(msg["data"]) if data["a"] == "post-deleted": try: post_id, _, post_type, post_url, callbacks = self.posts[action] del self.posts[action] if not post_type == "answer" or ("aId" in data and str(data["aId"]) == post_id): self.socket.send("-" + action) Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True) for callback, max_time in callbacks: if not max_time or time.time() < max_time: callback() except KeyError: pass
def handle_spam(post, reasons, why): datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.utcnow())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id != "": datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id) try: post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url)) if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): username = "" else: username = post.user_name.strip() Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) offensive_mask = 'offensive title detected' in reasons message = build_message(post, reasons) if offensive_mask: post.title = "(potentially offensive title -- see MS for details)" clean_message = build_message(post, reasons) log('debug', GlobalVars.parser.unescape(message).encode('ascii', errors='replace')) GlobalVars.deletion_watcher.subscribe(post_url) without_roles = tuple(["no-" + reason for reason in reasons]) + ("site-no-" + post.post_site,) if set(reasons) - GlobalVars.experimental_reasons == set() and \ not why.startswith("Post manually "): chatcommunicate.tell_rooms(message, ("experimental-all-sites", "experimental-site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: if offensive_mask: chatcommunicate.tell_rooms(message, ("all-sites", "site-" + post.post_site), without_roles + ("offensive-mask",), notify_site=post.post_site, report_data=(post_url, poster_url)) chatcommunicate.tell_rooms(clean_message, ("all-sites", "site-" + post.post_site), without_roles + ("no-offensive-mask",), notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all-sites", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except Exception as e: excepthook.uncaught_exception(*sys.exc_info())
def on_msg(msg, client): global _room_roles if not isinstance(msg, events.MessagePosted) and not isinstance( msg, events.MessageEdited): return message = msg.message room_ident = (client.host, message.room.id) with _room_roles_lock: if message.owner.id == client._br.user_id: if 'direct' in _room_roles and room_ident in _room_roles['direct']: SocketScience.receive( message.content_source.replace("\u200B", "").replace("\u200C", "")) return if message.content.startswith("<div class='partial'>"): message.content = message.content[21:] if message.content.endswith("</div>"): message.content = message.content[:-6] if message.parent: try: if message.parent.owner.id == client._br.user_id: strip_mention = regex.sub( "^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content) cmd = GlobalVars.parser.unescape(strip_mention) result = dispatch_reply_command(message.parent, message, cmd) send_reply_if_not_blank(room_ident, message.id, result) except ValueError: pass elif message.content.lower().startswith("sd "): result = dispatch_shorthand_command(message) send_reply_if_not_blank(room_ident, message.id, result) elif message.content.startswith( "!!/") or message.content.lower().startswith("sdc "): result = dispatch_command(message) send_reply_if_not_blank(room_ident, message.id, result) elif classes.feedback.FEEDBACK_REGEX.search(message.content) \ and is_privileged(message.owner, message.room) and datahandling.last_feedbacked: ids, expires_in = datahandling.last_feedbacked if time.time() < expires_in: Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids) else: with _room_roles_lock: if 'direct' in _room_roles and room_ident in _room_roles['direct']: SocketScience.receive( message.content_source.replace("\u200B", "").replace("\u200C", ""))
def on_msg(msg, client): global _room_roles if not isinstance(msg, events.MessagePosted) and not isinstance(msg, events.MessageEdited): return message = msg.message room_ident = (client.host, message.room.id) room_data = _rooms[room_ident] if message.owner.id == client._br.user_id: if 'direct' in _room_roles and room_ident in _room_roles['direct']: SocketScience.receive(message.content_source.replace("\u200B", "").replace("\u200C", "")) return if message.content.startswith("<div class='partial'>"): message.content = message.content[21:] if message.content.endswith("</div>"): message.content = message.content[:-6] if message.parent: try: if message.parent.owner.id == client._br.user_id: strip_mention = regex.sub("^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content) cmd = GlobalVars.parser.unescape(strip_mention) result = dispatch_reply_command(message.parent, message, cmd) if result: s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}" _msg_queue.put((room_data, s.format(message.id, result), None)) except ValueError: pass elif message.content.lower().startswith("sd "): result = dispatch_shorthand_command(message) if result: s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}" _msg_queue.put((room_data, s.format(message.id, result), None)) elif message.content.startswith("!!/"): result = dispatch_command(message) if result: s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}" _msg_queue.put((room_data, s.format(message.id, result), None)) elif classes.feedback.FEEDBACK_REGEX.search(message.content) \ and is_privileged(message.owner, message.room) and datahandling.last_feedbacked: ids, expires_in = datahandling.last_feedbacked if time.time() < expires_in: Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids) elif 'direct' in _room_roles and room_ident in _room_roles['direct']: SocketScience.receive(message.content_source.replace("\u200B", "").replace("\u200C", ""))
def on_msg(msg, client): if not isinstance(msg, events.MessagePosted) and not isinstance( msg, events.MessageEdited): return message = msg.message if message.owner.id == client._br.user_id: return if message.content.startswith("<div class='partial'>"): message.content = message.content[21:] if message.content.endswith("</div>"): message.content = message.content[:-6] room_data = _rooms[(client.host, message.room.id)] if message.parent: try: if message.parent.owner.id == client._br.user_id: strip_mention = regex.sub( "^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content) cmd = GlobalVars.parser.unescape(strip_mention) result = dispatch_reply_command(message.parent, message, cmd) if result: _msg_queue.put( (room_data, ":{} {}".format(message.id, result), None)) except ValueError: pass elif message.content.lower().startswith("sd "): result = dispatch_shorthand_command(message) if result: _msg_queue.put((room_data, ":{} {}".format(message.id, result), None)) elif message.content.startswith("!!/"): result = dispatch_command(message) if result: _msg_queue.put((room_data, ":{} {}".format(message.id, result), None)) elif classes.feedback.FEEDBACK_REGEX.search(message.content) \ and is_privileged(message.owner, message.room) and datahandling.last_feedbacked: ids, expires_in = datahandling.last_feedbacked if time.time() < expires_in: Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids)
def sum_weight(reasons: list): if not GlobalVars.reason_weights: datahandling.update_reason_weights() now = datetime.utcnow() - timedelta(minutes=15) if now.date() != GlobalVars.reason_weights['last_updated'] and now.hour >= 1: Tasks.do(datahandling.update_reason_weights) s = 0 weights = GlobalVars.reason_weights for r in reasons: try: if "(" in r: r = regex.sub(r"\s*\(.*$", "", r) s += weights[r.lower()] except KeyError: pass # s += 0 return s
def schedule_store_recently_scanned_posts(): global recently_scanned_posts_save_handle with recently_scanned_posts_save_handle_lock: if recently_scanned_posts_save_handle: recently_scanned_posts_save_handle.cancel() recently_scanned_posts_save_handle = Tasks.do( store_recently_scanned_posts)
def subscribe(self, post_url, callback=None, pickle=True, timeout=None): if GlobalVars.no_deletion_watcher: return post_id, post_site, post_type = fetch_post_id_and_site_from_url( post_url) if post_site not in GlobalVars.site_id_dict: log( "warning", "unknown site {} when subscribing to {}".format( post_site, post_url)) return if post_type == "answer": question_id = datahandling.get_post_site_id_link( (post_id, post_site, post_type)) if question_id is None: return else: question_id = post_id site_id = GlobalVars.site_id_dict[post_site] action = "{}-question-{}".format(site_id, question_id) max_time = (time.time() + timeout) if timeout else None if action not in self.posts: self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else []) try: self.socket.send(action) except websocket.WebSocketException: log('error', 'DeletionWatcher failed on sending {}'.format(action)) elif callback: _, _, _, _, callbacks = self.posts[action] callbacks.append((callback, max_time)) else: return if pickle: Tasks.do(self._save)
def check_queue(self): # This should be called once in a new Thread every time we add an entry to the queue. Thus, we # should only need to process a single queue entry in order to keep the queue from containing # entries which are qualified for processing, but which haven't been processed. However, that # doesn't account for the possibility of things going wrong and/or implementing some other # way to qualify other than the depth of the queue for a particular site (e.g. time in queue). # We use a copy of the queue in order to allow the queue to be changed in other threads. # This is OK, because self.make_api_call_for_site(site) verifies that the site # is still in the queue. sites_to_handle = [] is_time_sensitive_time = datetime.utcnow().hour in range(4, 12) with self.queue_lock: sites_in_queue = { site: len(values) for site, values in self.queue.items() } # Get sites listed in special cases and as time_sensitive for site, length in sites_in_queue.items(): if site in self.special_cases: if length >= self.special_cases[site]: sites_to_handle.append(site) continue if is_time_sensitive_time and site in self.time_sensitive and length >= 1: sites_to_handle.append(site) # Remove the sites which we've handled from our copy of the queue. for site in sites_to_handle: sites_in_queue.pop(site, None) # if we don't have any sites with their queue filled, take the first one without a special case for site, length in sites_in_queue.items(): if site not in self.special_cases and length >= self.threshold: sites_to_handle.append(site) for site in sites_to_handle: self.make_api_call_for_site(site) if not sites_to_handle: # We're not making an API request, so explicitly store the queue. Tasks.do(store_bodyfetcher_queue)
def subscribe(self, post_url, callback=None, pickle=True, timeout=None): if GlobalVars.no_deletion_watcher: return post_id, post_site, post_type = fetch_post_id_and_site_from_url( post_url) with GlobalVars.site_id_dict_lock: site_id = GlobalVars.site_id_dict.get(post_site, None) if not site_id: log( "warning", "unknown site {} when subscribing to {}".format( post_site, post_url)) return if post_type == "answer": question_id = datahandling.get_post_site_id_link( (post_id, post_site, post_type)) if question_id is None: return else: question_id = post_id action = "{}-question-{}".format(site_id, question_id) max_time = (time.time() + timeout) if timeout else None with self.posts_lock: if action not in self.posts: self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else []) Tasks.do(self._subscribe, action) elif callback: _, _, _, _, callbacks = self.posts[action] callbacks.append((callback, max_time)) else: return if pickle: self._schedule_save()
def on_msg(msg, client): if not isinstance(msg, events.MessagePosted) and not isinstance(msg, events.MessageEdited): return message = msg.message if message.owner.id == client._br.user_id: return if message.content.startswith("<div class='partial'>"): message.content = message.content[21:] if message.content.endswith("</div>"): message.content = message.content[:-6] room_data = _rooms[(client.host, message.room.id)] if message.parent: if message.parent.owner.id == client._br.user_id: strip_mention = regex.sub("^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content) cmd = GlobalVars.parser.unescape(strip_mention) result = dispatch_reply_command(message.parent, message, cmd) if result: _msg_queue.put((room_data, ":{} {}".format(message.id, result), None)) elif message.content.lower().startswith("sd "): result = dispatch_shorthand_command(message) if result: _msg_queue.put((room_data, ":{} {}".format(message.id, result), None)) elif message.content.startswith("!!/"): result = dispatch_command(message) if result: _msg_queue.put((room_data, ":{} {}".format(message.id, result), None)) elif classes.feedback.FEEDBACK_REGEX.search(message.content) \ and is_privileged(message.owner, message.room) and datahandling.last_feedbacked: ids, expires_in = datahandling.last_feedbacked if time.time() < expires_in: Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids)
def _start(self): while True: msg = self.socket.recv() if msg: msg = json.loads(msg) action = msg["action"] if action == "hb": self.socket.send("hb") else: data = json.loads(msg["data"]) now = time.time() with self.posts_lock: site_id, hostname, question_id, max_time = self.posts.get( action, (None, None, None, now)) if site_id and max_time <= now: del self.posts[action] Tasks.do(self._unsubscribe, action) if max_time > now and data["a"] == "post-edit": add_to_global_bodyfetcher_queue_in_new_thread( hostname, question_id, False, source="EditWatcher")
def check_websocket_for_deletion(self, post_site_id, post_url, timeout): time_to_check = time.time() + timeout post_id = post_site_id[0] post_type = post_site_id[2] if post_type == "answer": question_id = str(datahandling.get_post_site_id_link(post_site_id)) if question_id is None: return else: question_id = post_id post_site = post_site_id[1] if post_site not in GlobalVars.site_id_dict: return site_id = GlobalVars.site_id_dict[post_site] ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/") ws.send(site_id + "-question-" + question_id) while time.time() < time_to_check: ws.settimeout(time_to_check - time.time()) try: a = ws.recv() except websocket.WebSocketTimeoutException: Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False) return False if a is not None and a != "": try: action = json.loads(a)["action"] if action == "hb": ws.send("hb") continue else: d = json.loads(json.loads(a)["data"]) except: continue if d["a"] == "post-deleted" and str(d["qId"]) == question_id: if (post_type == "answer" and "aId" in d and str(d["aId"]) == post_id) or post_type == "question": Tasks.do( metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True) return True Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False) return False
def check_websocket_for_deletion(self, post_site_id, post_url, timeout): time_to_check = time.time() + timeout post_id = post_site_id[0] post_type = post_site_id[2] if post_type == "answer": question_id = str(datahandling.get_post_site_id_link(post_site_id)) if question_id is None: return else: question_id = post_id post_site = post_site_id[1] if post_site not in GlobalVars.site_id_dict: return site_id = GlobalVars.site_id_dict[post_site] ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/") ws.send(site_id + "-question-" + question_id) while time.time() < time_to_check: ws.settimeout(time_to_check - time.time()) try: a = ws.recv() except websocket.WebSocketTimeoutException: Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False) return False if a is not None and a != "": try: action = json.loads(a)["action"] if action == "hb": ws.send("hb") continue else: d = json.loads(json.loads(a)["data"]) except: continue if d["a"] == "post-deleted" and str(d["qId"]) == question_id: if (post_type == "answer" and "aId" in d and str(d["aId"]) == post_id) or post_type == "question": Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True) return True Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False) return False
def send_custom(type, url, msg): Tasks.do(Metasmoke.send_feedback_for_post, url, type, msg.owner.name, msg.owner.id, msg._client.host)
def _schedule_save(self): with self.save_handle_lock: if self.save_handle: self.save_handle.cancel() save_handle = Tasks.do(self._save)
def send_custom(type, url, msg): Tasks.do(metasmoke.Metasmoke.send_feedback_for_post, url, type, msg.owner.name, msg.owner.id, msg._client.host)
def handle_spam(post, reasons, why): post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url)) shortened_site = post.post_site.replace("stackexchange.com", "SE") # site.stackexchange.com -> site.SE datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id is not "": datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id) if GlobalVars.reason_weights or GlobalVars.metasmoke_key: reason_weight = sum_weight(reasons) if reason_weight >= 1000: reason_weight_s = " (**{}**)".format(reason_weight) else: reason_weight_s = " ({})".format(reason_weight) else: # No reason weight if neither cache nor MS reason_weight_s = "" try: # If the post is an answer type post, the 'title' is going to be blank, so when posting the # message contents we need to set the post title to the *parent* title, so the message in the # chat is properly constructed with parent title instead. This will make things 'print' # in a proper way in chat messages. sanitized_title = parsing.sanitize_title(post.title if not post.is_answer else post.parent.title) sanitized_title = escape_format(sanitized_title).strip() prefix = u"[ [SmokeDetector](//git.io/vyDZv) ]" if GlobalVars.metasmoke_key: prefix_ms = u"[ [SmokeDetector](//git.io/vyDZv) | [MS]({}) ]".format( to_metasmoke_link(post_url, protocol=False)) else: prefix_ms = prefix # We'll insert reason list later edited = '' if not post.edited else ' \u270F\uFE0F' if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): s = " {{}}{}: [{}]({}){} by a deleted user on `{}`".format( reason_weight_s, sanitized_title, post_url, edited, shortened_site) username = "" else: username = post.user_name.strip() escaped_username = escape_format(parsing.escape_markdown(username)) s = " {{}}{}: [{}]({}){} by [{}]({}) on `{}`".format( reason_weight_s, sanitized_title, post_url, edited, escaped_username, poster_url, shortened_site) Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace')) GlobalVars.deletion_watcher.subscribe(post_url) reason = message = None for reason_count in range(5, 0, -1): # Try 5 reasons and all the way down to 1 reason = ", ".join(reasons[:reason_count]) if len(reasons) > reason_count: reason += ", +{} more".format(len(reasons) - reason_count) reason = reason.capitalize() message = prefix_ms + s.format(reason) # Insert reason list if len(message) <= 500: break # Problem solved, stop attempting s = s.format(reason) # Later code needs this variable if len(message) > 500: message = (prefix_ms + s)[:500] # Truncate directly and keep MS link without_roles = tuple(["no-" + reason for reason in reasons]) + ("site-no-" + post.post_site,) if set(reasons) - GlobalVars.experimental_reasons == set() and \ not why.startswith("Post manually "): chatcommunicate.tell_rooms(message, ("experimental",), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except Exception as e: excepthook.uncaught_exception(*sys.exc_info())
def handle_spam(post, reasons, why): post_url = parsing.to_protocol_relative( parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative( parsing.user_url_to_shortlink(post.user_url)) shortened_site = post.post_site.replace( "stackexchange.com", "SE") # site.stackexchange.com -> site.SE datahandling.append_to_latest_questions( post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post( (post.post_id, post.post_site, datetime.now())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id is not "": datahandling.add_post_site_id_link( (post.post_id, post.post_site, "answer"), post.parent.post_id) if GlobalVars.reason_weights or GlobalVars.metasmoke_key: reason_weight = sum_weight(reasons) if reason_weight >= 1000: reason_weight_s = " (**{:,}**)".format(reason_weight) else: reason_weight_s = " ({:,})".format(reason_weight) else: # No reason weight if neither cache nor MS reason_weight_s = "" try: # If the post is an answer type post, the 'title' is going to be blank, so when posting the # message contents we need to set the post title to the *parent* title, so the message in the # chat is properly constructed with parent title instead. This will make things 'print' # in a proper way in chat messages. sanitized_title = parsing.sanitize_title( post.title if not post.is_answer else post.parent.title) sanitized_title = escape_format(sanitized_title).strip() prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]" if GlobalVars.metasmoke_key: prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS]({}) ]".format( to_metasmoke_link(post_url, protocol=False)) else: prefix_ms = prefix # We'll insert reason list later edited = '' if not post.edited else ' \u270F\uFE0F' if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): s = " {{}}{}: [{}]({}){} by a deleted user on `{}`".format( reason_weight_s, sanitized_title, post_url, edited, shortened_site) username = "" else: username = post.user_name.strip() escaped_username = escape_format(parsing.escape_markdown(username)) s = " {{}}{}: [{}]({}){} by [{}]({}) on `{}`".format( reason_weight_s, sanitized_title, post_url, edited, escaped_username, poster_url, shortened_site) Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace')) GlobalVars.deletion_watcher.subscribe(post_url) reason = message = None for reason_count in range( 5, 0, -1): # Try 5 reasons and all the way down to 1 reason = ", ".join(reasons[:reason_count]) if len(reasons) > reason_count: reason += ", +{} more".format(len(reasons) - reason_count) reason = reason.capitalize() message = prefix_ms + s.format(reason) # Insert reason list if len(message) <= 500: break # Problem solved, stop attempting s = s.format(reason) # Later code needs this variable if len(message) > 500: message = (prefix_ms + s)[:500] # Truncate directly and keep MS link without_roles = tuple(["no-" + reason for reason in reasons ]) + ("site-no-" + post.post_site, ) if set(reasons) - GlobalVars.experimental_reasons == set() and \ not why.startswith("Post manually "): chatcommunicate.tell_rooms(message, ("experimental", ), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except Exception as e: excepthook.uncaught_exception(*sys.exc_info())
def stopflagging(): Tasks.do(Metasmoke.stop_autoflagging) return "Request sent..."
def handle_spam(post, reasons, why): post_url = parsing.to_protocol_relative( parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative( parsing.user_url_to_shortlink(post.user_url)) shortened_site = post.post_site.replace( "stackexchange.com", "SE") # site.stackexchange.com -> site.SE datahandling.append_to_latest_questions( post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post( (post.post_id, post.post_site, datetime.now())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id is not "": datahandling.add_post_site_id_link( (post.post_id, post.post_site, "answer"), post.parent.post_id) try: # If the post is an answer type post, the 'title' is going to be blank, so when posting the # message contents we need to set the post title to the *parent* title, so the message in the # chat is properly constructed with parent title instead. This will make things 'print' # in a proper way in chat messages. sanitized_title = parsing.sanitize_title( post.title if not post.is_answer else post.parent.title) prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]" if GlobalVars.metasmoke_key: prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/uid/{}/{}) ]".format( api_parameter_from_link(post_url), post.post_id) else: prefix_ms = prefix # We'll insert reason list later if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): s = u" {{}}: [{}]({}) by a deleted user on `{}`".format( sanitized_title, post_url, shortened_site) username = "" else: s = u" {{}}: [{}]({}) by [{}]({}) on `{}`".format( sanitized_title, post_url, post.user_name.strip(), poster_url, shortened_site) username = post.user_name.strip() Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace')) GlobalVars.deletion_watcher.subscribe(post_url) reason = message = None for reason_count in range(5, 2, -1): # Try 5 reasons, then 4, then 3 reason = ", ".join(reasons[:reason_count]) if len(reasons) > reason_count: reason += ", +{} more".format(len(reasons) - reason_count) reason = reason[:1].upper() + reason[ 1:] # reason is capitalised, unlike the entries of reasons list message = prefix_ms + s.format(reason) # Insert reason list if len(message) <= 500: break # Problem solved, stop attempting s = s.format(reason) # Later code needs this variable if len(message) > 500: message = (prefix_ms + s)[:500] # Truncate directly and keep MS link without_roles = tuple( "no-" + reason for reason in reasons) + ("site-no-" + post.post_site, ) if set(reasons) - GlobalVars.experimental_reasons == set(): chatcommunicate.tell_rooms(message, ("experimental", ), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except: exc_type, exc_obj, exc_tb = sys.exc_info() excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
def make_api_call_for_site(self, site): with self.queue_lock: new_posts = self.queue.pop(site, None) if new_posts is None: # site was not in the queue return Tasks.do(store_bodyfetcher_queue) new_post_ids = [int(k) for k in new_posts.keys()] if GlobalVars.flovis is not None: for post_id in new_post_ids: GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id, { 'site': site, 'posts': list(new_posts.keys()) }) # Add queue timing data pop_time = datetime.utcnow() post_add_times = [(pop_time - v).total_seconds() for k, v in new_posts.items()] Tasks.do(add_queue_timing_data, site, post_add_times) store_max_ids = False with self.max_ids_modify_lock: if site in self.previous_max_ids and max( new_post_ids) > self.previous_max_ids[site]: previous_max_id = self.previous_max_ids[site] intermediate_posts = range(previous_max_id + 1, max(new_post_ids)) # We don't want to go over the 100-post API cutoff, so take the last # (100-len(new_post_ids)) from intermediate_posts intermediate_posts = intermediate_posts[-(100 - len(new_post_ids)):] # new_post_ids could contain edited posts, so merge it back in combined = chain(intermediate_posts, new_post_ids) # Could be duplicates, so uniquify posts = list(set(combined)) else: posts = new_post_ids new_post_ids_max = max(new_post_ids) if new_post_ids_max > self.previous_max_ids.get(site, 0): self.previous_max_ids[site] = new_post_ids_max store_max_ids = True if store_max_ids: schedule_store_bodyfetcher_max_ids() log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site)) if len(new_post_ids) > 30: log( 'debug', "{} +{} more".format( sorted(new_post_ids)[:30], len(new_post_ids) - 30)) else: log('debug', sorted(new_post_ids)) if len(new_post_ids) == len(posts): log('debug', "[ *Identical* ]") elif len(posts) > 30: log('debug', "{} +{} more".format(sorted(posts)[:30], len(posts) - 30)) else: log('debug', sorted(posts)) question_modifier = "" pagesize_modifier = {} if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. with self.last_activity_date_lock: if self.last_activity_date != 0: pagesize = "100" else: pagesize = "50" pagesize_modifier = { 'pagesize': pagesize, 'min': str(self.last_activity_date - self.ACTIVITY_DATE_EXTRA_EARLIER_MS_TO_FETCH) } else: question_modifier = "/{0}".format(";".join( [str(post) for post in posts])) url = "https://api.stackexchange.com/2.2/questions{}".format( question_modifier) params = { 'filter': '!1rs)sUKylwB)8isvCRk.xNu71LnaxjnPS12*pX*CEOKbPFwVFdHNxiMa7GIVgzDAwMa', 'key': 'IAkbitmze4B8KpacUfLqkw((', 'site': site } params.update(pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) with GlobalVars.api_request_lock: # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.utcnow().strftime('%H:%M:%S') response = requests.get(url, params=params, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. with self.queue_lock: if site in self.queue: self.queue[site].update(new_posts) else: self.queue[site] = new_posts return with self.api_data_lock: add_or_update_api_data(site) message_hq = "" with GlobalVars.apiquota_rw_lock: if "quota_remaining" in response: quota_remaining = response["quota_remaining"] if quota_remaining - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0 \ and quota_remaining > 39980: tell_rooms_with( "debug", "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format( GlobalVars.apiquota, quota_remaining)) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace( '.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format( str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip( ) tell_rooms_with("debug", api_quota_used_per_site) clear_api_data() if quota_remaining == 0: tell_rooms_with( "debug", "API reports no quota left! May be a glitch.") tell_rooms_with( "debug", str(response)) # No code format for now? if GlobalVars.apiquota == -1: tell_rooms_with( "debug", "Restart: API quota is {quota}.".format( quota=quota_remaining)) GlobalVars.apiquota = quota_remaining else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format( response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time( ) + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." message_hq += " Previous URL: `{}`".format(url) if "backoff" in response: if GlobalVars.api_backoff_time < time.time( ) + response["backoff"]: GlobalVars.api_backoff_time = time.time( ) + response["backoff"] if len(message_hq) > 0 and "site is required" not in message_hq: message_hq = message_hq.strip() if len(message_hq) > 500: message_hq = "\n" + message_hq tell_rooms_with("debug", message_hq) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: with self.last_activity_date_lock: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: if GlobalVars.flovis is not None: pnb = copy.deepcopy(post) if 'body' in pnb: pnb['body'] = 'Present, but truncated' if 'answers' in pnb: del pnb['answers'] if "title" not in post or "body" not in post: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/no_content', site, post['question_id'], pnb) continue post['site'] = site try: post['edited'] = (post['creation_date'] != post['last_edit_date']) except KeyError: post[ 'edited'] = False # last_edit_date not present = not edited question_doesnt_need_scan = is_post_recently_scanned_and_unchanged( post) add_recently_scanned_post(post) if not question_doesnt_need_scan: try: post_ = Post(api_response=post) except PostParseError as err: log( 'error', 'Error {0} when parsing post: {1!r}'.format( err, post_)) if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/error', site, post['question_id'], pnb) continue num_scanned += 1 is_spam, reason, why = check_if_spam(post_) if is_spam: try: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(post=post_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) try: if "answers" not in post: pass else: for answer in post["answers"]: if GlobalVars.flovis is not None: anb = copy.deepcopy(answer) if 'body' in anb: anb['body'] = 'Present, but truncated' num_scanned += 1 answer["IsAnswer"] = True # Necesssary for Post object answer[ "title"] = "" # Necessary for proper Post object creation answer[ "site"] = site # Necessary for proper Post object creation try: answer['edited'] = (answer['creation_date'] != answer['last_edit_date']) except KeyError: answer[ 'edited'] = False # last_edit_date not present = not edited answer_doesnt_need_scan = is_post_recently_scanned_and_unchanged( answer) add_recently_scanned_post(answer) if answer_doesnt_need_scan: continue answer_ = Post(api_response=answer, parent=post_) is_spam, reason, why = check_if_spam(answer_) if is_spam: try: if GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(answer_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) except Exception as e: log('error', "Exception handling answers:", e) end_time = time.time() scan_time = end_time - start_time GlobalVars.PostScanStat.add_stat(num_scanned, scan_time) return
def handle_spam(post, reasons, why): post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url)) reason = ", ".join(reasons[:5]) if len(reasons) > 5: reason += ", +{} more".format(len(reasons) - 5) reason = reason[:1].upper() + reason[1:] # reason is capitalised, unlike the entries of reasons list shortened_site = post.post_site.replace("stackexchange.com", "SE") # site.stackexchange.com -> site.SE datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id is not "": datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id) try: post._title = parsing.escape_special_chars_in_title(post.title) if post.is_answer: # If the post is an answer type post, the 'title' is going to be blank, so when posting the # message contents we need to set the post title to the *parent* title, so the message in the # chat is properly constructed with parent title instead. This will make things 'print' # in a proper way in chat messages. sanitized_title = regex.sub('(https?://|\n)', '', post.parent.title) else: sanitized_title = regex.sub('(https?://|\n)', '', post.title) sanitized_title = regex.sub(r'([\]*`])', r'\\\1', sanitized_title).replace('\n', u'\u23CE') prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]" if GlobalVars.metasmoke_key: prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/by-url?url=" + \ post_url + ") ]" else: prefix_ms = prefix if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): s = u" {}: [{}]({}) by a deleted user on `{}`".format(reason, sanitized_title.strip(), post_url, shortened_site) username = "" else: s = u" {}: [{}]({}) by [{}]({}) on `{}`".format(reason, sanitized_title.strip(), post_url, post.user_name.strip(), poster_url, shortened_site) username = post.user_name.strip() Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace')) datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title) message = prefix_ms + s if len(message) > 500: message = (prefix + s)[:500] without_roles = tuple("no-" + reason for reason in reasons) if set(reason) & GlobalVars.experimental_reasons == {}: chatcommunicate.tell_rooms(message, ("experimental"), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except: exc_type, exc_obj, exc_tb = sys.exc_info() excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
def handle_spam(post, reasons, why): post_url = parsing.to_protocol_relative( parsing.url_to_shortlink(post.post_url)) poster_url = parsing.to_protocol_relative( parsing.user_url_to_shortlink(post.user_url)) reason = ", ".join(reasons[:5]) if len(reasons) > 5: reason += ", +{} more".format(len(reasons) - 5) reason = reason[:1].upper() + reason[ 1:] # reason is capitalised, unlike the entries of reasons list shortened_site = post.post_site.replace( "stackexchange.com", "SE") # site.stackexchange.com -> site.SE datahandling.append_to_latest_questions( post.post_site, post.post_id, post.title if not post.is_answer else "") if len(reasons) == 1 and ("all-caps title" in reasons or "repeating characters in title" in reasons or "repeating characters in body" in reasons or "repeating characters in answer" in reasons or "repeating words in title" in reasons or "repeating words in body" in reasons or "repeating words in answer" in reasons): datahandling.add_auto_ignored_post( (post.post_id, post.post_site, datetime.now())) if why is not None and why != "": datahandling.add_why(post.post_site, post.post_id, why) if post.is_answer and post.post_id is not None and post.post_id is not "": datahandling.add_post_site_id_link( (post.post_id, post.post_site, "answer"), post.parent.post_id) try: post._title = parsing.escape_special_chars_in_title(post.title) if post.is_answer: # If the post is an answer type post, the 'title' is going to be blank, so when posting the # message contents we need to set the post title to the *parent* title, so the message in the # chat is properly constructed with parent title instead. This will make things 'print' # in a proper way in chat messages. sanitized_title = regex.sub('(https?://|\n)', '', post.parent.title) else: sanitized_title = regex.sub('(https?://|\n)', '', post.title) sanitized_title = regex.sub(r'([\]*`])', r'\\\1', sanitized_title).replace('\n', u'\u23CE') prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]" if GlobalVars.metasmoke_key: prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/by-url?url=" + \ post_url + ") ]" else: prefix_ms = prefix if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""): s = u" {}: [{}]({}) by a deleted user on `{}`".format( reason, sanitized_title.strip(), post_url, shortened_site) username = "" else: s = u" {}: [{}]({}) by [{}]({}) on `{}`".format( reason, sanitized_title.strip(), post_url, post.user_name.strip(), poster_url, shortened_site) username = post.user_name.strip() Tasks.do(metasmoke.Metasmoke.send_stats_on_post, post.title_ignore_type, post_url, reasons, post.body, username, post.user_link, why, post.owner_rep, post.post_score, post.up_vote_count, post.down_vote_count) log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace')) datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title) message = prefix_ms + s if len(message) > 500: message = (prefix + s)[:500] without_roles = tuple( "no-" + reason for reason in reasons) + ("site-no-" + post.post_site, ) if set(reason) & GlobalVars.experimental_reasons == {}: chatcommunicate.tell_rooms(message, ("experimental"), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) else: chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site), without_roles, notify_site=post.post_site, report_data=(post_url, poster_url)) except: exc_type, exc_obj, exc_tb = sys.exc_info() excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
def schedule_store_bodyfetcher_max_ids(): global bodyfetcher_max_ids_save_handle with bodyfetcher_max_ids_save_handle_lock: if bodyfetcher_max_ids_save_handle: bodyfetcher_max_ids_save_handle.cancel() bodyfetcher_max_ids_save_handle = Tasks.do(store_bodyfetcher_max_ids)