def dispatch_reply_command(msg, reply, full_cmd, comment=True):
    command_parts = full_cmd.split(" ", 1)

    if len(command_parts) == 2:
        cmd, args = command_parts
    else:
        cmd, = command_parts
        args = ""

    cmd = cmd.lower()

    quiet_action = cmd[-1] == "-"
    cmd = regex.sub(r"\W*$", "", cmd)

    if cmd in _reply_commands:
        func, (min_arity, max_arity) = _reply_commands[cmd]

        assert min_arity == 1

        if max_arity == 1:
            return func(msg, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
        elif max_arity == 2:
            return func(msg, args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
        else:
            args = args.split()
            args.extend([None] * (max_arity - len(args)))

            return func(msg, *args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
    elif comment and is_privileged(reply.owner, reply.room):
        post_data = get_report_data(msg)

        if post_data:
            Tasks.do(metasmoke.Metasmoke.post_auto_comment, full_cmd, reply.owner, url=post_data[0])
    def _start(self):
        while True:
            msg = self.socket.recv()

            if msg:
                msg = json.loads(msg)
                action = msg["action"]

                if action == "hb":
                    self.socket.send("hb")
                else:
                    data = json.loads(msg["data"])

                    if data["a"] == "post-deleted":
                        try:
                            post_id, _, post_type, post_url, callbacks = self.posts[
                                action]
                            del self.posts[action]

                            if not post_type == "answer" or (
                                    "aId" in data
                                    and str(data["aId"]) == post_id):
                                self.socket.send("-" + action)
                                Tasks.do(
                                    metasmoke.Metasmoke.
                                    send_deletion_stats_for_post, post_url,
                                    True)

                                for callback, max_time in callbacks:
                                    if not max_time or time.time() < max_time:
                                        callback()
                        except KeyError:
                            pass
    def _start(self):
        while True:
            msg = self.socket.recv()

            if msg:
                msg = json.loads(msg)
                action = msg["action"]

                if action == "hb":
                    self.socket.send("hb")
                else:
                    data = json.loads(msg["data"])

                    if data["a"] == "post-deleted":
                        try:
                            with self.posts_lock:
                                post_id, _, _, post_url, callbacks = self.posts[
                                    action]

                            if post_id == str(data["aId"] if "aId" in
                                              data else data["qId"]):
                                with self.posts_lock:
                                    del self.posts[action]
                                Tasks.do(self._unsubscribe, action)
                                Tasks.do(
                                    metasmoke.Metasmoke.
                                    send_deletion_stats_for_post, post_url,
                                    True)

                                for callback, max_time in callbacks:
                                    if not max_time or time.time() < max_time:
                                        callback()
                        except KeyError:
                            pass
Beispiel #4
0
    def __init__(self):
        if GlobalVars.no_edit_watcher:
            self.socket = None
            return
        # posts is a dict with the WebSocket action as keys {site_id}-question-{question_id} as keys
        # with each value being: (site_id, hostname, question_id, max_time)
        self.posts = {}
        self.posts_lock = threading.Lock()
        self.save_handle = None
        self.save_handle_lock = threading.Lock()

        try:
            self.socket = websocket.create_connection(
                "wss://qa.sockets.stackexchange.com/")
        except websocket.WebSocketException:
            self.socket = None
            log('error', 'EditWatcher failed to create a websocket connection')

        if datahandling.has_pickle(PICKLE_FILENAME):
            pickle_data = datahandling.load_pickle(PICKLE_FILENAME)
            now = time.time()
            new_posts = {
                action: value
                for action, value in pickle_data if value[-1] > now
            }
            with self.posts_lock:
                self.posts = new_posts
            for action in new_posts.keys():
                Tasks.do(self._subscribe, action)
            self._schedule_save()

        threading.Thread(name="edit watcher", target=self._start,
                         daemon=True).start()
    def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
        post_id, post_site, post_type = fetch_post_id_and_site_from_url(post_url)

        if post_site not in GlobalVars.site_id_dict:
            log("warning", "unknown site {} when subscribing to {}".format(post_site, post_url))
            return

        if post_type == "answer":
            question_id = datahandling.get_post_site_id_link((post_id, post_site, post_type))

            if question_id is None:
                return
        else:
            question_id = post_id

        site_id = GlobalVars.site_id_dict[post_site]
        action = "{}-question-{}".format(site_id, question_id)
        max_time = (time.time() + timeout) if timeout else None

        if action not in self.posts:
            self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else [])
            try:
                self.socket.send(action)
            except websocket.WebSocketException:
                log('error', 'DeletionWatcher failed on sending {}'.format(action))
        elif callback:
            _, _, _, _, callbacks = self.posts[action]
            callbacks.append((callback, max_time))
        else:
            return

        if pickle:
            Tasks.do(self._save)
    def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
        post_id, post_site, post_type = fetch_post_id_and_site_from_url(
            post_url)

        if post_site not in GlobalVars.site_id_dict:
            log(
                "warning", "unknown site {} when subscribing to {}".format(
                    post_site, post_url))
            return

        if post_type == "answer":
            question_id = datahandling.get_post_site_id_link(
                (post_id, post_site, post_type))

            if question_id is None:
                return
        else:
            question_id = post_id

        site_id = GlobalVars.site_id_dict[post_site]
        action = "{}-question-{}".format(site_id, question_id)
        max_time = (time.time() + timeout) if timeout else None

        if action in self.posts and callback:
            _, _, _, _, callbacks = self.posts[action]
            callbacks.append((callback, max_time))
        else:
            self.posts[action] = (post_id, post_site, post_type, post_url,
                                  [(callback, max_time)] if callback else [])
            self.socket.send(action)

        if pickle:
            Tasks.do(self._save)
    def subscribe(self, post_url=None, hostname=None, site_id=None, question_id=None,
                  pickle=True, timeout=DEFAULT_TIMEOUT, max_time=None, from_time=None):
        if GlobalVars.no_edit_watcher:
            return
        if post_url and not ((hostname or site_id) and question_id):
            post_id, hostname, post_type = fetch_post_id_and_site_from_url(post_url)
            if post_type == "answer":
                question_id = datahandling.get_post_site_id_link((post_id, hostname, post_type))
                if question_id is None:
                    log("warning", "Unable to get question ID when subscribing to: hostname: "
                                   "{} :: post ID:{} when subscribing to {}".format(hostname, post_id, post_url))
                    return
            else:
                question_id = post_id
            if post_type != "question":
                log("warning", "tried to edit-watch non-question: hostname: "
                               "{} :: post ID:{} when subscribing to {}".format(hostname, question_id, post_url))
                return
        if not site_id or not hostname:
            with GlobalVars.site_id_dict_lock:
                if not site_id and hostname:
                    site_id = GlobalVars.site_id_dict.get(hostname)
                if site_id and not hostname:
                    hostname = GlobalVars.site_id_dict_by_id.get(site_id)
        if not site_id or not hostname:
            log("warning", "unable to determine a valid site ID or hostname when subscribing to question ID "
                           "{}:: site_id:{}::  hostname:{}::  post_url:{}".format(question_id, site_id, hostname,
                                                                                  post_url))
            return

        question_ids = question_id
        if type(question_ids) != list:
            question_ids = [question_id]
        now = time.time()
        if from_time:
            now = from_time
        if not max_time:
            max_time = now + timeout

        updated = None
        to_subscribe = []
        with self.posts_lock:
            for question_id in question_ids:
                action = "{}-question-{}".format(site_id, question_id)
                if action not in self.posts:
                    self.posts[action] = (site_id, hostname, question_id, max_time)
                    to_subscribe.append(action)
                else:
                    old_max_time = self.posts[action][2]
                    if max_time > old_max_time:
                        self.posts[action] = (site_id, hostname, question_id, max_time)
                    elif updated is None:
                        updated = False

        for action in to_subscribe:
            print('scheduling subscription to action:', action)
            Tasks.do(self._subscribe, action)

        if updated and pickle:
            self._schedule_save()
    def _start(self):
        while True:
            msg = self.socket.recv()

            if msg:
                msg = json.loads(msg)
                action = msg["action"]

                if action == "hb":
                    self.socket.send("hb")
                else:
                    data = json.loads(msg["data"])

                    if data["a"] == "post-deleted":
                        try:
                            post_id, _, post_type, post_url, callbacks = self.posts[action]
                            del self.posts[action]

                            if not post_type == "answer" or ("aId" in data and str(data["aId"]) == post_id):
                                self.socket.send("-" + action)
                                Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True)

                                for callback, max_time in callbacks:
                                    if not max_time or time.time() < max_time:
                                        callback()
                        except KeyError:
                            pass
def dispatch_reply_command(msg, reply, full_cmd, comment=True):
    command_parts = full_cmd.split(" ", 1)

    if len(command_parts) == 2:
        cmd, args = command_parts
    else:
        cmd, = command_parts
        args = ""

    cmd = cmd.lower()

    quiet_action = cmd[-1] == "-"
    cmd = regex.sub(r"\W*$", "", cmd)

    if cmd in _reply_commands:
        func, (min_arity, max_arity) = _reply_commands[cmd]

        assert min_arity == 1

        if max_arity == 1:
            return func(msg, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
        elif max_arity == 2:
            return func(msg, args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
        else:
            args = args.split()
            args.extend([None] * (max_arity - len(args)))

            return func(msg, *args, original_msg=reply, alias_used=cmd, quiet_action=quiet_action)
    elif comment and is_privileged(reply.owner, reply.room):
        post_data = get_report_data(msg)

        if post_data:
            Tasks.do(metasmoke.Metasmoke.post_auto_comment, full_cmd, reply.owner, url=post_data[0])
Beispiel #10
0
def handle_spam(post, reasons, why):
    datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "")

    if len(reasons) == 1 and ("all-caps title" in reasons or
                              "repeating characters in title" in reasons or
                              "repeating characters in body" in reasons or
                              "repeating characters in answer" in reasons or
                              "repeating words in title" in reasons or
                              "repeating words in body" in reasons or
                              "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.utcnow()))

    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)

    if post.is_answer and post.post_id is not None and post.post_id != "":
        datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id)

    try:
        post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
        poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
        if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""):
            username = ""
        else:
            username = post.user_name.strip()

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body, username,
                 post.user_link, why, post.owner_rep, post.post_score,
                 post.up_vote_count, post.down_vote_count)

        offensive_mask = 'offensive title detected' in reasons
        message = build_message(post, reasons)
        if offensive_mask:
            post.title = "(potentially offensive title -- see MS for details)"
            clean_message = build_message(post, reasons)

        log('debug', GlobalVars.parser.unescape(message).encode('ascii', errors='replace'))
        GlobalVars.deletion_watcher.subscribe(post_url)

        without_roles = tuple(["no-" + reason for reason in reasons]) + ("site-no-" + post.post_site,)

        if set(reasons) - GlobalVars.experimental_reasons == set() and \
                not why.startswith("Post manually "):
            chatcommunicate.tell_rooms(message, ("experimental-all-sites", "experimental-site-" + post.post_site),
                                       without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
        else:
            if offensive_mask:
                chatcommunicate.tell_rooms(message, ("all-sites", "site-" + post.post_site),
                                           without_roles + ("offensive-mask",), notify_site=post.post_site,
                                           report_data=(post_url, poster_url))
                chatcommunicate.tell_rooms(clean_message, ("all-sites", "site-" + post.post_site),
                                           without_roles + ("no-offensive-mask",), notify_site=post.post_site,
                                           report_data=(post_url, poster_url))
            else:
                chatcommunicate.tell_rooms(message, ("all-sites", "site-" + post.post_site),
                                           without_roles, notify_site=post.post_site,
                                           report_data=(post_url, poster_url))
    except Exception as e:
        excepthook.uncaught_exception(*sys.exc_info())
Beispiel #11
0
def on_msg(msg, client):
    global _room_roles

    if not isinstance(msg, events.MessagePosted) and not isinstance(
            msg, events.MessageEdited):
        return

    message = msg.message
    room_ident = (client.host, message.room.id)

    with _room_roles_lock:
        if message.owner.id == client._br.user_id:
            if 'direct' in _room_roles and room_ident in _room_roles['direct']:
                SocketScience.receive(
                    message.content_source.replace("\u200B",
                                                   "").replace("\u200C", ""))

            return

    if message.content.startswith("<div class='partial'>"):
        message.content = message.content[21:]
        if message.content.endswith("</div>"):
            message.content = message.content[:-6]

    if message.parent:
        try:
            if message.parent.owner.id == client._br.user_id:
                strip_mention = regex.sub(
                    "^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "",
                    message.content)
                cmd = GlobalVars.parser.unescape(strip_mention)

                result = dispatch_reply_command(message.parent, message, cmd)
                send_reply_if_not_blank(room_ident, message.id, result)
        except ValueError:
            pass
    elif message.content.lower().startswith("sd "):
        result = dispatch_shorthand_command(message)
        send_reply_if_not_blank(room_ident, message.id, result)
    elif message.content.startswith(
            "!!/") or message.content.lower().startswith("sdc "):
        result = dispatch_command(message)
        send_reply_if_not_blank(room_ident, message.id, result)
    elif classes.feedback.FEEDBACK_REGEX.search(message.content) \
            and is_privileged(message.owner, message.room) and datahandling.last_feedbacked:
        ids, expires_in = datahandling.last_feedbacked

        if time.time() < expires_in:
            Tasks.do(metasmoke.Metasmoke.post_auto_comment,
                     message.content_source,
                     message.owner,
                     ids=ids)
    else:
        with _room_roles_lock:
            if 'direct' in _room_roles and room_ident in _room_roles['direct']:
                SocketScience.receive(
                    message.content_source.replace("\u200B",
                                                   "").replace("\u200C", ""))
def on_msg(msg, client):
    global _room_roles

    if not isinstance(msg, events.MessagePosted) and not isinstance(msg, events.MessageEdited):
        return

    message = msg.message
    room_ident = (client.host, message.room.id)
    room_data = _rooms[room_ident]

    if message.owner.id == client._br.user_id:
        if 'direct' in _room_roles and room_ident in _room_roles['direct']:
            SocketScience.receive(message.content_source.replace("\u200B", "").replace("\u200C", ""))

        return

    if message.content.startswith("<div class='partial'>"):
        message.content = message.content[21:]
        if message.content.endswith("</div>"):
            message.content = message.content[:-6]

    if message.parent:
        try:
            if message.parent.owner.id == client._br.user_id:
                strip_mention = regex.sub("^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content)
                cmd = GlobalVars.parser.unescape(strip_mention)

                result = dispatch_reply_command(message.parent, message, cmd)

                if result:
                    s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}"
                    _msg_queue.put((room_data, s.format(message.id, result), None))
        except ValueError:
            pass
    elif message.content.lower().startswith("sd "):
        result = dispatch_shorthand_command(message)

        if result:
            s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}"
            _msg_queue.put((room_data, s.format(message.id, result), None))
    elif message.content.startswith("!!/"):
        result = dispatch_command(message)

        if result:
            s = ":{}\n{}" if "\n" not in result and len(result) >= 488 else ":{} {}"
            _msg_queue.put((room_data, s.format(message.id, result), None))
    elif classes.feedback.FEEDBACK_REGEX.search(message.content) \
            and is_privileged(message.owner, message.room) and datahandling.last_feedbacked:
        ids, expires_in = datahandling.last_feedbacked

        if time.time() < expires_in:
            Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids)
    elif 'direct' in _room_roles and room_ident in _room_roles['direct']:
        SocketScience.receive(message.content_source.replace("\u200B", "").replace("\u200C", ""))
Beispiel #13
0
def on_msg(msg, client):
    if not isinstance(msg, events.MessagePosted) and not isinstance(
            msg, events.MessageEdited):
        return

    message = msg.message
    if message.owner.id == client._br.user_id:
        return
    if message.content.startswith("<div class='partial'>"):
        message.content = message.content[21:]
        if message.content.endswith("</div>"):
            message.content = message.content[:-6]

    room_data = _rooms[(client.host, message.room.id)]

    if message.parent:
        try:
            if message.parent.owner.id == client._br.user_id:
                strip_mention = regex.sub(
                    "^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "",
                    message.content)
                cmd = GlobalVars.parser.unescape(strip_mention)

                result = dispatch_reply_command(message.parent, message, cmd)

                if result:
                    _msg_queue.put(
                        (room_data, ":{} {}".format(message.id, result), None))
        except ValueError:
            pass
    elif message.content.lower().startswith("sd "):
        result = dispatch_shorthand_command(message)

        if result:
            _msg_queue.put((room_data, ":{} {}".format(message.id,
                                                       result), None))
    elif message.content.startswith("!!/"):
        result = dispatch_command(message)

        if result:
            _msg_queue.put((room_data, ":{} {}".format(message.id,
                                                       result), None))
    elif classes.feedback.FEEDBACK_REGEX.search(message.content) \
            and is_privileged(message.owner, message.room) and datahandling.last_feedbacked:
        ids, expires_in = datahandling.last_feedbacked

        if time.time() < expires_in:
            Tasks.do(metasmoke.Metasmoke.post_auto_comment,
                     message.content_source,
                     message.owner,
                     ids=ids)
def sum_weight(reasons: list):
    if not GlobalVars.reason_weights:
        datahandling.update_reason_weights()
    now = datetime.utcnow() - timedelta(minutes=15)
    if now.date() != GlobalVars.reason_weights['last_updated'] and now.hour >= 1:
        Tasks.do(datahandling.update_reason_weights)
    s = 0
    weights = GlobalVars.reason_weights
    for r in reasons:
        try:
            if "(" in r:
                r = regex.sub(r"\s*\(.*$", "", r)
            s += weights[r.lower()]
        except KeyError:
            pass  # s += 0
    return s
def sum_weight(reasons: list):
    if not GlobalVars.reason_weights:
        datahandling.update_reason_weights()
    now = datetime.utcnow() - timedelta(minutes=15)
    if now.date() != GlobalVars.reason_weights['last_updated'] and now.hour >= 1:
        Tasks.do(datahandling.update_reason_weights)
    s = 0
    weights = GlobalVars.reason_weights
    for r in reasons:
        try:
            if "(" in r:
                r = regex.sub(r"\s*\(.*$", "", r)
            s += weights[r.lower()]
        except KeyError:
            pass  # s += 0
    return s
Beispiel #16
0
def schedule_store_recently_scanned_posts():
    global recently_scanned_posts_save_handle
    with recently_scanned_posts_save_handle_lock:
        if recently_scanned_posts_save_handle:
            recently_scanned_posts_save_handle.cancel()
        recently_scanned_posts_save_handle = Tasks.do(
            store_recently_scanned_posts)
    def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
        if GlobalVars.no_deletion_watcher:
            return
        post_id, post_site, post_type = fetch_post_id_and_site_from_url(
            post_url)

        if post_site not in GlobalVars.site_id_dict:
            log(
                "warning", "unknown site {} when subscribing to {}".format(
                    post_site, post_url))
            return

        if post_type == "answer":
            question_id = datahandling.get_post_site_id_link(
                (post_id, post_site, post_type))

            if question_id is None:
                return
        else:
            question_id = post_id

        site_id = GlobalVars.site_id_dict[post_site]
        action = "{}-question-{}".format(site_id, question_id)
        max_time = (time.time() + timeout) if timeout else None

        if action not in self.posts:
            self.posts[action] = (post_id, post_site, post_type, post_url,
                                  [(callback, max_time)] if callback else [])
            try:
                self.socket.send(action)
            except websocket.WebSocketException:
                log('error',
                    'DeletionWatcher failed on sending {}'.format(action))
        elif callback:
            _, _, _, _, callbacks = self.posts[action]
            callbacks.append((callback, max_time))
        else:
            return

        if pickle:
            Tasks.do(self._save)
    def check_queue(self):
        # This should be called once in a new Thread every time we add an entry to the queue. Thus, we
        # should only need to process a single queue entry in order to keep the queue from containing
        # entries which are qualified for processing, but which haven't been processed. However, that
        # doesn't account for the possibility of things going wrong and/or implementing some other
        # way to qualify other than the depth of the queue for a particular site (e.g. time in queue).

        # We use a copy of the queue in order to allow the queue to be changed in other threads.
        # This is OK, because self.make_api_call_for_site(site) verifies that the site
        # is still in the queue.
        sites_to_handle = []
        is_time_sensitive_time = datetime.utcnow().hour in range(4, 12)
        with self.queue_lock:
            sites_in_queue = {
                site: len(values)
                for site, values in self.queue.items()
            }
        # Get sites listed in special cases and as time_sensitive
        for site, length in sites_in_queue.items():
            if site in self.special_cases:
                if length >= self.special_cases[site]:
                    sites_to_handle.append(site)
                    continue
            if is_time_sensitive_time and site in self.time_sensitive and length >= 1:
                sites_to_handle.append(site)

        # Remove the sites which we've handled from our copy of the queue.
        for site in sites_to_handle:
            sites_in_queue.pop(site, None)

        # if we don't have any sites with their queue filled, take the first one without a special case
        for site, length in sites_in_queue.items():
            if site not in self.special_cases and length >= self.threshold:
                sites_to_handle.append(site)

        for site in sites_to_handle:
            self.make_api_call_for_site(site)

        if not sites_to_handle:
            # We're not making an API request, so explicitly store the queue.
            Tasks.do(store_bodyfetcher_queue)
    def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
        if GlobalVars.no_deletion_watcher:
            return
        post_id, post_site, post_type = fetch_post_id_and_site_from_url(
            post_url)

        with GlobalVars.site_id_dict_lock:
            site_id = GlobalVars.site_id_dict.get(post_site, None)
        if not site_id:
            log(
                "warning", "unknown site {} when subscribing to {}".format(
                    post_site, post_url))
            return

        if post_type == "answer":
            question_id = datahandling.get_post_site_id_link(
                (post_id, post_site, post_type))

            if question_id is None:
                return
        else:
            question_id = post_id

        action = "{}-question-{}".format(site_id, question_id)
        max_time = (time.time() + timeout) if timeout else None

        with self.posts_lock:
            if action not in self.posts:
                self.posts[action] = (post_id, post_site, post_type, post_url,
                                      [(callback,
                                        max_time)] if callback else [])
                Tasks.do(self._subscribe, action)
            elif callback:
                _, _, _, _, callbacks = self.posts[action]
                callbacks.append((callback, max_time))
            else:
                return

        if pickle:
            self._schedule_save()
def on_msg(msg, client):
    if not isinstance(msg, events.MessagePosted) and not isinstance(msg, events.MessageEdited):
        return

    message = msg.message
    if message.owner.id == client._br.user_id:
        return
    if message.content.startswith("<div class='partial'>"):
        message.content = message.content[21:]
        if message.content.endswith("</div>"):
            message.content = message.content[:-6]

    room_data = _rooms[(client.host, message.room.id)]

    if message.parent:
        if message.parent.owner.id == client._br.user_id:
            strip_mention = regex.sub("^(<span class=(\"|')mention(\"|')>)?@.*?(</span>)? ", "", message.content)
            cmd = GlobalVars.parser.unescape(strip_mention)

            result = dispatch_reply_command(message.parent, message, cmd)

            if result:
                _msg_queue.put((room_data, ":{} {}".format(message.id, result), None))
    elif message.content.lower().startswith("sd "):
        result = dispatch_shorthand_command(message)

        if result:
            _msg_queue.put((room_data, ":{} {}".format(message.id, result), None))
    elif message.content.startswith("!!/"):
        result = dispatch_command(message)

        if result:
            _msg_queue.put((room_data, ":{} {}".format(message.id, result), None))
    elif classes.feedback.FEEDBACK_REGEX.search(message.content) \
            and is_privileged(message.owner, message.room) and datahandling.last_feedbacked:
            ids, expires_in = datahandling.last_feedbacked

            if time.time() < expires_in:
                Tasks.do(metasmoke.Metasmoke.post_auto_comment, message.content_source, message.owner, ids=ids)
Beispiel #21
0
    def _start(self):
        while True:
            msg = self.socket.recv()

            if msg:
                msg = json.loads(msg)
                action = msg["action"]

                if action == "hb":
                    self.socket.send("hb")
                else:
                    data = json.loads(msg["data"])
                    now = time.time()
                    with self.posts_lock:
                        site_id, hostname, question_id, max_time = self.posts.get(
                            action, (None, None, None, now))
                        if site_id and max_time <= now:
                            del self.posts[action]
                            Tasks.do(self._unsubscribe, action)
                    if max_time > now and data["a"] == "post-edit":
                        add_to_global_bodyfetcher_queue_in_new_thread(
                            hostname, question_id, False, source="EditWatcher")
Beispiel #22
0
    def check_websocket_for_deletion(self, post_site_id, post_url, timeout):
        time_to_check = time.time() + timeout
        post_id = post_site_id[0]
        post_type = post_site_id[2]
        if post_type == "answer":
            question_id = str(datahandling.get_post_site_id_link(post_site_id))
            if question_id is None:
                return
        else:
            question_id = post_id
        post_site = post_site_id[1]
        if post_site not in GlobalVars.site_id_dict:
            return
        site_id = GlobalVars.site_id_dict[post_site]

        ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
        ws.send(site_id + "-question-" + question_id)

        while time.time() < time_to_check:
            ws.settimeout(time_to_check - time.time())
            try:
                a = ws.recv()
            except websocket.WebSocketTimeoutException:
                Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post,
                         post_url, False)
                return False
            if a is not None and a != "":
                try:
                    action = json.loads(a)["action"]
                    if action == "hb":
                        ws.send("hb")
                        continue
                    else:
                        d = json.loads(json.loads(a)["data"])
                except:
                    continue
                if d["a"] == "post-deleted" and str(d["qId"]) == question_id:
                    if (post_type == "answer" and "aId" in d and str(d["aId"])
                            == post_id) or post_type == "question":
                        Tasks.do(
                            metasmoke.Metasmoke.send_deletion_stats_for_post,
                            post_url, True)
                        return True

        Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url,
                 False)
        return False
    def check_websocket_for_deletion(self, post_site_id, post_url, timeout):
        time_to_check = time.time() + timeout
        post_id = post_site_id[0]
        post_type = post_site_id[2]
        if post_type == "answer":
            question_id = str(datahandling.get_post_site_id_link(post_site_id))
            if question_id is None:
                return
        else:
            question_id = post_id
        post_site = post_site_id[1]
        if post_site not in GlobalVars.site_id_dict:
            return
        site_id = GlobalVars.site_id_dict[post_site]

        ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
        ws.send(site_id + "-question-" + question_id)

        while time.time() < time_to_check:
            ws.settimeout(time_to_check - time.time())
            try:
                a = ws.recv()
            except websocket.WebSocketTimeoutException:
                Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False)
                return False
            if a is not None and a != "":
                try:
                    action = json.loads(a)["action"]
                    if action == "hb":
                        ws.send("hb")
                        continue
                    else:
                        d = json.loads(json.loads(a)["data"])
                except:
                    continue
                if d["a"] == "post-deleted" and str(d["qId"]) == question_id:
                    if (post_type == "answer" and "aId" in d and str(d["aId"]) == post_id) or post_type == "question":
                        Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, True)
                        return True

        Tasks.do(metasmoke.Metasmoke.send_deletion_stats_for_post, post_url, False)
        return False
Beispiel #24
0
 def send_custom(type, url, msg):
     Tasks.do(Metasmoke.send_feedback_for_post, url, type, msg.owner.name,
              msg.owner.id, msg._client.host)
Beispiel #25
0
 def _schedule_save(self):
     with self.save_handle_lock:
         if self.save_handle:
             self.save_handle.cancel()
         save_handle = Tasks.do(self._save)
Beispiel #26
0
 def send_custom(type, url, msg):
     Tasks.do(metasmoke.Metasmoke.send_feedback_for_post, url, type, msg.owner.name, msg.owner.id, msg._client.host)
def handle_spam(post, reasons, why):
    post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
    poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
    shortened_site = post.post_site.replace("stackexchange.com", "SE")  # site.stackexchange.com -> site.SE
    datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "")
    if len(reasons) == 1 and ("all-caps title" in reasons or
                              "repeating characters in title" in reasons or
                              "repeating characters in body" in reasons or
                              "repeating characters in answer" in reasons or
                              "repeating words in title" in reasons or
                              "repeating words in body" in reasons or
                              "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now()))
    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)
    if post.is_answer and post.post_id is not None and post.post_id is not "":
        datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id)
    if GlobalVars.reason_weights or GlobalVars.metasmoke_key:
        reason_weight = sum_weight(reasons)
        if reason_weight >= 1000:
            reason_weight_s = " (**{}**)".format(reason_weight)
        else:
            reason_weight_s = " ({})".format(reason_weight)
    else:  # No reason weight if neither cache nor MS
        reason_weight_s = ""
    try:
        # If the post is an answer type post, the 'title' is going to be blank, so when posting the
        # message contents we need to set the post title to the *parent* title, so the message in the
        # chat is properly constructed with parent title instead. This will make things 'print'
        # in a proper way in chat messages.
        sanitized_title = parsing.sanitize_title(post.title if not post.is_answer else post.parent.title)
        sanitized_title = escape_format(sanitized_title).strip()

        prefix = u"[ [SmokeDetector](//git.io/vyDZv) ]"
        if GlobalVars.metasmoke_key:
            prefix_ms = u"[ [SmokeDetector](//git.io/vyDZv) | [MS]({}) ]".format(
                to_metasmoke_link(post_url, protocol=False))
        else:
            prefix_ms = prefix

        # We'll insert reason list later
        edited = '' if not post.edited else ' \u270F\uFE0F'
        if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""):
            s = " {{}}{}: [{}]({}){} by a deleted user on `{}`".format(
                reason_weight_s, sanitized_title, post_url, edited, shortened_site)
            username = ""
        else:
            username = post.user_name.strip()
            escaped_username = escape_format(parsing.escape_markdown(username))
            s = " {{}}{}: [{}]({}){} by [{}]({}) on `{}`".format(
                reason_weight_s, sanitized_title, post_url, edited, escaped_username, poster_url, shortened_site)

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body, username,
                 post.user_link, why, post.owner_rep, post.post_score,
                 post.up_vote_count, post.down_vote_count)

        log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
        GlobalVars.deletion_watcher.subscribe(post_url)

        reason = message = None
        for reason_count in range(5, 0, -1):  # Try 5 reasons and all the way down to 1
            reason = ", ".join(reasons[:reason_count])
            if len(reasons) > reason_count:
                reason += ", +{} more".format(len(reasons) - reason_count)
            reason = reason.capitalize()
            message = prefix_ms + s.format(reason)  # Insert reason list
            if len(message) <= 500:
                break  # Problem solved, stop attempting

        s = s.format(reason)  # Later code needs this variable
        if len(message) > 500:
            message = (prefix_ms + s)[:500]  # Truncate directly and keep MS link

        without_roles = tuple(["no-" + reason for reason in reasons]) + ("site-no-" + post.post_site,)

        if set(reasons) - GlobalVars.experimental_reasons == set() and \
                not why.startswith("Post manually "):
            chatcommunicate.tell_rooms(message, ("experimental",),
                                       without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
        else:
            chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
                                       without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
    except Exception as e:
        excepthook.uncaught_exception(*sys.exc_info())
Beispiel #28
0
def handle_spam(post, reasons, why):
    post_url = parsing.to_protocol_relative(
        parsing.url_to_shortlink(post.post_url))
    poster_url = parsing.to_protocol_relative(
        parsing.user_url_to_shortlink(post.user_url))
    shortened_site = post.post_site.replace(
        "stackexchange.com", "SE")  # site.stackexchange.com -> site.SE
    datahandling.append_to_latest_questions(
        post.post_site, post.post_id, post.title if not post.is_answer else "")
    if len(reasons) == 1 and ("all-caps title" in reasons
                              or "repeating characters in title" in reasons
                              or "repeating characters in body" in reasons
                              or "repeating characters in answer" in reasons
                              or "repeating words in title" in reasons
                              or "repeating words in body" in reasons
                              or "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post(
            (post.post_id, post.post_site, datetime.now()))
    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)
    if post.is_answer and post.post_id is not None and post.post_id is not "":
        datahandling.add_post_site_id_link(
            (post.post_id, post.post_site, "answer"), post.parent.post_id)
    if GlobalVars.reason_weights or GlobalVars.metasmoke_key:
        reason_weight = sum_weight(reasons)
        if reason_weight >= 1000:
            reason_weight_s = " (**{:,}**)".format(reason_weight)
        else:
            reason_weight_s = " ({:,})".format(reason_weight)
    else:  # No reason weight if neither cache nor MS
        reason_weight_s = ""
    try:
        # If the post is an answer type post, the 'title' is going to be blank, so when posting the
        # message contents we need to set the post title to the *parent* title, so the message in the
        # chat is properly constructed with parent title instead. This will make things 'print'
        # in a proper way in chat messages.
        sanitized_title = parsing.sanitize_title(
            post.title if not post.is_answer else post.parent.title)
        sanitized_title = escape_format(sanitized_title).strip()

        prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]"
        if GlobalVars.metasmoke_key:
            prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS]({}) ]".format(
                to_metasmoke_link(post_url, protocol=False))
        else:
            prefix_ms = prefix

        # We'll insert reason list later
        edited = '' if not post.edited else ' \u270F\uFE0F'
        if not post.user_name.strip() or (not poster_url
                                          or poster_url.strip() == ""):
            s = " {{}}{}: [{}]({}){} by a deleted user on `{}`".format(
                reason_weight_s, sanitized_title, post_url, edited,
                shortened_site)
            username = ""
        else:
            username = post.user_name.strip()
            escaped_username = escape_format(parsing.escape_markdown(username))
            s = " {{}}{}: [{}]({}){} by [{}]({}) on `{}`".format(
                reason_weight_s, sanitized_title, post_url, edited,
                escaped_username, poster_url, shortened_site)

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body,
                 username, post.user_link, why, post.owner_rep,
                 post.post_score, post.up_vote_count, post.down_vote_count)

        log('debug',
            GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
        GlobalVars.deletion_watcher.subscribe(post_url)

        reason = message = None
        for reason_count in range(
                5, 0, -1):  # Try 5 reasons and all the way down to 1
            reason = ", ".join(reasons[:reason_count])
            if len(reasons) > reason_count:
                reason += ", +{} more".format(len(reasons) - reason_count)
            reason = reason.capitalize()
            message = prefix_ms + s.format(reason)  # Insert reason list
            if len(message) <= 500:
                break  # Problem solved, stop attempting

        s = s.format(reason)  # Later code needs this variable
        if len(message) > 500:
            message = (prefix_ms +
                       s)[:500]  # Truncate directly and keep MS link

        without_roles = tuple(["no-" + reason for reason in reasons
                               ]) + ("site-no-" + post.post_site, )

        if set(reasons) - GlobalVars.experimental_reasons == set() and \
                not why.startswith("Post manually "):
            chatcommunicate.tell_rooms(message, ("experimental", ),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
        else:
            chatcommunicate.tell_rooms(message,
                                       ("all", "site-" + post.post_site),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
    except Exception as e:
        excepthook.uncaught_exception(*sys.exc_info())
Beispiel #29
0
def stopflagging():
    Tasks.do(Metasmoke.stop_autoflagging)
    return "Request sent..."
Beispiel #30
0
def handle_spam(post, reasons, why):
    post_url = parsing.to_protocol_relative(
        parsing.url_to_shortlink(post.post_url))
    poster_url = parsing.to_protocol_relative(
        parsing.user_url_to_shortlink(post.user_url))
    shortened_site = post.post_site.replace(
        "stackexchange.com", "SE")  # site.stackexchange.com -> site.SE
    datahandling.append_to_latest_questions(
        post.post_site, post.post_id, post.title if not post.is_answer else "")
    if len(reasons) == 1 and ("all-caps title" in reasons
                              or "repeating characters in title" in reasons
                              or "repeating characters in body" in reasons
                              or "repeating characters in answer" in reasons
                              or "repeating words in title" in reasons
                              or "repeating words in body" in reasons
                              or "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post(
            (post.post_id, post.post_site, datetime.now()))
    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)
    if post.is_answer and post.post_id is not None and post.post_id is not "":
        datahandling.add_post_site_id_link(
            (post.post_id, post.post_site, "answer"), post.parent.post_id)
    try:
        # If the post is an answer type post, the 'title' is going to be blank, so when posting the
        # message contents we need to set the post title to the *parent* title, so the message in the
        # chat is properly constructed with parent title instead. This will make things 'print'
        # in a proper way in chat messages.
        sanitized_title = parsing.sanitize_title(
            post.title if not post.is_answer else post.parent.title)

        prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]"
        if GlobalVars.metasmoke_key:
            prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/uid/{}/{}) ]".format(
                api_parameter_from_link(post_url), post.post_id)
        else:
            prefix_ms = prefix

        # We'll insert reason list later
        if not post.user_name.strip() or (not poster_url
                                          or poster_url.strip() == ""):
            s = u" {{}}: [{}]({}) by a deleted user on `{}`".format(
                sanitized_title, post_url, shortened_site)
            username = ""
        else:
            s = u" {{}}: [{}]({}) by [{}]({}) on `{}`".format(
                sanitized_title, post_url, post.user_name.strip(), poster_url,
                shortened_site)
            username = post.user_name.strip()

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body,
                 username, post.user_link, why, post.owner_rep,
                 post.post_score, post.up_vote_count, post.down_vote_count)

        log('debug',
            GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
        GlobalVars.deletion_watcher.subscribe(post_url)

        reason = message = None
        for reason_count in range(5, 2, -1):  # Try 5 reasons, then 4, then 3
            reason = ", ".join(reasons[:reason_count])
            if len(reasons) > reason_count:
                reason += ", +{} more".format(len(reasons) - reason_count)
            reason = reason[:1].upper() + reason[
                1:]  # reason is capitalised, unlike the entries of reasons list
            message = prefix_ms + s.format(reason)  # Insert reason list
            if len(message) <= 500:
                break  # Problem solved, stop attempting

        s = s.format(reason)  # Later code needs this variable
        if len(message) > 500:
            message = (prefix_ms +
                       s)[:500]  # Truncate directly and keep MS link

        without_roles = tuple(
            "no-" + reason
            for reason in reasons) + ("site-no-" + post.post_site, )

        if set(reasons) - GlobalVars.experimental_reasons == set():
            chatcommunicate.tell_rooms(message, ("experimental", ),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
        else:
            chatcommunicate.tell_rooms(message,
                                       ("all", "site-" + post.post_site),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
    def make_api_call_for_site(self, site):
        with self.queue_lock:
            new_posts = self.queue.pop(site, None)
        if new_posts is None:
            # site was not in the queue
            return
        Tasks.do(store_bodyfetcher_queue)

        new_post_ids = [int(k) for k in new_posts.keys()]

        if GlobalVars.flovis is not None:
            for post_id in new_post_ids:
                GlobalVars.flovis.stage('bodyfetcher/api_request', site,
                                        post_id, {
                                            'site': site,
                                            'posts': list(new_posts.keys())
                                        })

        # Add queue timing data
        pop_time = datetime.utcnow()
        post_add_times = [(pop_time - v).total_seconds()
                          for k, v in new_posts.items()]
        Tasks.do(add_queue_timing_data, site, post_add_times)

        store_max_ids = False
        with self.max_ids_modify_lock:
            if site in self.previous_max_ids and max(
                    new_post_ids) > self.previous_max_ids[site]:
                previous_max_id = self.previous_max_ids[site]
                intermediate_posts = range(previous_max_id + 1,
                                           max(new_post_ids))

                # We don't want to go over the 100-post API cutoff, so take the last
                # (100-len(new_post_ids)) from intermediate_posts

                intermediate_posts = intermediate_posts[-(100 -
                                                          len(new_post_ids)):]

                # new_post_ids could contain edited posts, so merge it back in
                combined = chain(intermediate_posts, new_post_ids)

                # Could be duplicates, so uniquify
                posts = list(set(combined))
            else:
                posts = new_post_ids

            new_post_ids_max = max(new_post_ids)
            if new_post_ids_max > self.previous_max_ids.get(site, 0):
                self.previous_max_ids[site] = new_post_ids_max
                store_max_ids = True

        if store_max_ids:
            schedule_store_bodyfetcher_max_ids()

        log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site))
        if len(new_post_ids) > 30:
            log(
                'debug', "{} +{} more".format(
                    sorted(new_post_ids)[:30],
                    len(new_post_ids) - 30))
        else:
            log('debug', sorted(new_post_ids))
        if len(new_post_ids) == len(posts):
            log('debug', "[ *Identical* ]")
        elif len(posts) > 30:
            log('debug',
                "{} +{} more".format(sorted(posts)[:30],
                                     len(posts) - 30))
        else:
            log('debug', sorted(posts))

        question_modifier = ""
        pagesize_modifier = {}

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            with self.last_activity_date_lock:
                if self.last_activity_date != 0:
                    pagesize = "100"
                else:
                    pagesize = "50"

                pagesize_modifier = {
                    'pagesize':
                    pagesize,
                    'min':
                    str(self.last_activity_date -
                        self.ACTIVITY_DATE_EXTRA_EARLIER_MS_TO_FETCH)
                }
        else:
            question_modifier = "/{0}".format(";".join(
                [str(post) for post in posts]))

        url = "https://api.stackexchange.com/2.2/questions{}".format(
            question_modifier)
        params = {
            'filter':
            '!1rs)sUKylwB)8isvCRk.xNu71LnaxjnPS12*pX*CEOKbPFwVFdHNxiMa7GIVgzDAwMa',
            'key': 'IAkbitmze4B8KpacUfLqkw((',
            'site': site
        }
        params.update(pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        with GlobalVars.api_request_lock:
            # Respect backoff, if we were given one
            if GlobalVars.api_backoff_time > time.time():
                time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
            try:
                time_request_made = datetime.utcnow().strftime('%H:%M:%S')
                response = requests.get(url, params=params, timeout=20).json()
            except (requests.exceptions.Timeout, requests.ConnectionError,
                    Exception):
                # Any failure in the request being made (timeout or otherwise) should be added back to
                # the queue.
                with self.queue_lock:
                    if site in self.queue:
                        self.queue[site].update(new_posts)
                    else:
                        self.queue[site] = new_posts
                return

            with self.api_data_lock:
                add_or_update_api_data(site)

            message_hq = ""
            with GlobalVars.apiquota_rw_lock:
                if "quota_remaining" in response:
                    quota_remaining = response["quota_remaining"]
                    if quota_remaining - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0 \
                            and quota_remaining > 39980:
                        tell_rooms_with(
                            "debug",
                            "API quota rolled over with {0} requests remaining. "
                            "Current quota: {1}.".format(
                                GlobalVars.apiquota, quota_remaining))

                        sorted_calls_per_site = sorted(
                            GlobalVars.api_calls_per_site.items(),
                            key=itemgetter(1),
                            reverse=True)
                        api_quota_used_per_site = ""
                        for site_name, quota_used in sorted_calls_per_site:
                            sanatized_site_name = site_name.replace(
                                '.com', '').replace('.stackexchange', '')
                            api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(
                                str(quota_used))
                        api_quota_used_per_site = api_quota_used_per_site.strip(
                        )

                        tell_rooms_with("debug", api_quota_used_per_site)
                        clear_api_data()
                    if quota_remaining == 0:
                        tell_rooms_with(
                            "debug",
                            "API reports no quota left!  May be a glitch.")
                        tell_rooms_with(
                            "debug", str(response))  # No code format for now?
                    if GlobalVars.apiquota == -1:
                        tell_rooms_with(
                            "debug", "Restart: API quota is {quota}.".format(
                                quota=quota_remaining))
                    GlobalVars.apiquota = quota_remaining
                else:
                    message_hq = "The quota_remaining property was not in the API response."

            if "error_message" in response:
                message_hq += " Error: {} at {} UTC.".format(
                    response["error_message"], time_request_made)
                if "error_id" in response and response["error_id"] == 502:
                    if GlobalVars.api_backoff_time < time.time(
                    ) + 12:  # Add a backoff of 10 + 2 seconds as a default
                        GlobalVars.api_backoff_time = time.time() + 12
                message_hq += " Backing off on requests for the next 12 seconds."
                message_hq += " Previous URL: `{}`".format(url)

            if "backoff" in response:
                if GlobalVars.api_backoff_time < time.time(
                ) + response["backoff"]:
                    GlobalVars.api_backoff_time = time.time(
                    ) + response["backoff"]

        if len(message_hq) > 0 and "site is required" not in message_hq:
            message_hq = message_hq.strip()
            if len(message_hq) > 500:
                message_hq = "\n" + message_hq
            tell_rooms_with("debug", message_hq)

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                with self.last_activity_date_lock:
                    self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            if GlobalVars.flovis is not None:
                pnb = copy.deepcopy(post)
                if 'body' in pnb:
                    pnb['body'] = 'Present, but truncated'
                if 'answers' in pnb:
                    del pnb['answers']

            if "title" not in post or "body" not in post:
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage(
                        'bodyfetcher/api_response/no_content', site,
                        post['question_id'], pnb)
                continue

            post['site'] = site
            try:
                post['edited'] = (post['creation_date'] !=
                                  post['last_edit_date'])
            except KeyError:
                post[
                    'edited'] = False  # last_edit_date not present = not edited

            question_doesnt_need_scan = is_post_recently_scanned_and_unchanged(
                post)
            add_recently_scanned_post(post)
            if not question_doesnt_need_scan:
                try:
                    post_ = Post(api_response=post)
                except PostParseError as err:
                    log(
                        'error', 'Error {0} when parsing post: {1!r}'.format(
                            err, post_))
                    if GlobalVars.flovis is not None and 'question_id' in post:
                        GlobalVars.flovis.stage(
                            'bodyfetcher/api_response/error', site,
                            post['question_id'], pnb)
                    continue

                num_scanned += 1

                is_spam, reason, why = check_if_spam(post_)

                if is_spam:
                    try:
                        if GlobalVars.flovis is not None and 'question_id' in post:
                            GlobalVars.flovis.stage(
                                'bodyfetcher/api_response/spam', site,
                                post['question_id'], {
                                    'post': pnb,
                                    'check_if_spam': [is_spam, reason, why]
                                })
                        handle_spam(post=post_, reasons=reason, why=why)
                    except Exception as e:
                        log('error', "Exception in handle_spam:", e)
                elif GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage(
                        'bodyfetcher/api_response/not_spam', site,
                        post['question_id'], {
                            'post': pnb,
                            'check_if_spam': [is_spam, reason, why]
                        })

            try:
                if "answers" not in post:
                    pass
                else:
                    for answer in post["answers"]:
                        if GlobalVars.flovis is not None:
                            anb = copy.deepcopy(answer)
                            if 'body' in anb:
                                anb['body'] = 'Present, but truncated'

                        num_scanned += 1
                        answer["IsAnswer"] = True  # Necesssary for Post object
                        answer[
                            "title"] = ""  # Necessary for proper Post object creation
                        answer[
                            "site"] = site  # Necessary for proper Post object creation
                        try:
                            answer['edited'] = (answer['creation_date'] !=
                                                answer['last_edit_date'])
                        except KeyError:
                            answer[
                                'edited'] = False  # last_edit_date not present = not edited
                        answer_doesnt_need_scan = is_post_recently_scanned_and_unchanged(
                            answer)
                        add_recently_scanned_post(answer)
                        if answer_doesnt_need_scan:
                            continue
                        answer_ = Post(api_response=answer, parent=post_)

                        is_spam, reason, why = check_if_spam(answer_)
                        if is_spam:
                            try:
                                if GlobalVars.flovis is not None and 'answer_id' in answer:
                                    GlobalVars.flovis.stage(
                                        'bodyfetcher/api_response/spam', site,
                                        answer['answer_id'], {
                                            'post': anb,
                                            'check_if_spam':
                                            [is_spam, reason, why]
                                        })
                                handle_spam(answer_, reasons=reason, why=why)
                            except Exception as e:
                                log('error', "Exception in handle_spam:", e)
                        elif GlobalVars.flovis is not None and 'answer_id' in answer:
                            GlobalVars.flovis.stage(
                                'bodyfetcher/api_response/not_spam', site,
                                answer['answer_id'], {
                                    'post': anb,
                                    'check_if_spam': [is_spam, reason, why]
                                })

            except Exception as e:
                log('error', "Exception handling answers:", e)

        end_time = time.time()
        scan_time = end_time - start_time
        GlobalVars.PostScanStat.add_stat(num_scanned, scan_time)
        return
def handle_spam(post, reasons, why):
    post_url = parsing.to_protocol_relative(parsing.url_to_shortlink(post.post_url))
    poster_url = parsing.to_protocol_relative(parsing.user_url_to_shortlink(post.user_url))
    reason = ", ".join(reasons[:5])
    if len(reasons) > 5:
        reason += ", +{} more".format(len(reasons) - 5)
    reason = reason[:1].upper() + reason[1:]  # reason is capitalised, unlike the entries of reasons list
    shortened_site = post.post_site.replace("stackexchange.com", "SE")  # site.stackexchange.com -> site.SE
    datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title if not post.is_answer else "")
    if len(reasons) == 1 and ("all-caps title" in reasons or
                              "repeating characters in title" in reasons or
                              "repeating characters in body" in reasons or
                              "repeating characters in answer" in reasons or
                              "repeating words in title" in reasons or
                              "repeating words in body" in reasons or
                              "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post((post.post_id, post.post_site, datetime.now()))
    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)
    if post.is_answer and post.post_id is not None and post.post_id is not "":
        datahandling.add_post_site_id_link((post.post_id, post.post_site, "answer"), post.parent.post_id)
    try:
        post._title = parsing.escape_special_chars_in_title(post.title)
        if post.is_answer:
            # If the post is an answer type post, the 'title' is going to be blank, so when posting the
            # message contents we need to set the post title to the *parent* title, so the message in the
            # chat is properly constructed with parent title instead. This will make things 'print'
            # in a proper way in chat messages.
            sanitized_title = regex.sub('(https?://|\n)', '', post.parent.title)
        else:
            sanitized_title = regex.sub('(https?://|\n)', '', post.title)

        sanitized_title = regex.sub(r'([\]*`])', r'\\\1', sanitized_title).replace('\n', u'\u23CE')

        prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]"
        if GlobalVars.metasmoke_key:
            prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/by-url?url=" + \
                        post_url + ") ]"
        else:
            prefix_ms = prefix

        if not post.user_name.strip() or (not poster_url or poster_url.strip() == ""):
            s = u" {}: [{}]({}) by a deleted user on `{}`".format(reason, sanitized_title.strip(), post_url,
                                                                  shortened_site)
            username = ""
        else:
            s = u" {}: [{}]({}) by [{}]({}) on `{}`".format(reason, sanitized_title.strip(), post_url,
                                                            post.user_name.strip(), poster_url, shortened_site)
            username = post.user_name.strip()

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body, username,
                 post.user_link, why, post.owner_rep, post.post_score,
                 post.up_vote_count, post.down_vote_count)

        log('debug', GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
        datahandling.append_to_latest_questions(post.post_site, post.post_id, post.title)

        message = prefix_ms + s
        if len(message) > 500:
            message = (prefix + s)[:500]

        without_roles = tuple("no-" + reason for reason in reasons)

        if set(reason) & GlobalVars.experimental_reasons == {}:
            chatcommunicate.tell_rooms(message, ("experimental"),
                                       without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
        else:
            chatcommunicate.tell_rooms(message, ("all", "site-" + post.post_site),
                                       without_roles, notify_site=post.post_site, report_data=(post_url, poster_url))
    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
Beispiel #33
0
def handle_spam(post, reasons, why):
    post_url = parsing.to_protocol_relative(
        parsing.url_to_shortlink(post.post_url))
    poster_url = parsing.to_protocol_relative(
        parsing.user_url_to_shortlink(post.user_url))
    reason = ", ".join(reasons[:5])
    if len(reasons) > 5:
        reason += ", +{} more".format(len(reasons) - 5)
    reason = reason[:1].upper() + reason[
        1:]  # reason is capitalised, unlike the entries of reasons list
    shortened_site = post.post_site.replace(
        "stackexchange.com", "SE")  # site.stackexchange.com -> site.SE
    datahandling.append_to_latest_questions(
        post.post_site, post.post_id, post.title if not post.is_answer else "")
    if len(reasons) == 1 and ("all-caps title" in reasons
                              or "repeating characters in title" in reasons
                              or "repeating characters in body" in reasons
                              or "repeating characters in answer" in reasons
                              or "repeating words in title" in reasons
                              or "repeating words in body" in reasons
                              or "repeating words in answer" in reasons):
        datahandling.add_auto_ignored_post(
            (post.post_id, post.post_site, datetime.now()))
    if why is not None and why != "":
        datahandling.add_why(post.post_site, post.post_id, why)
    if post.is_answer and post.post_id is not None and post.post_id is not "":
        datahandling.add_post_site_id_link(
            (post.post_id, post.post_site, "answer"), post.parent.post_id)
    try:
        post._title = parsing.escape_special_chars_in_title(post.title)
        if post.is_answer:
            # If the post is an answer type post, the 'title' is going to be blank, so when posting the
            # message contents we need to set the post title to the *parent* title, so the message in the
            # chat is properly constructed with parent title instead. This will make things 'print'
            # in a proper way in chat messages.
            sanitized_title = regex.sub('(https?://|\n)', '',
                                        post.parent.title)
        else:
            sanitized_title = regex.sub('(https?://|\n)', '', post.title)

        sanitized_title = regex.sub(r'([\]*`])', r'\\\1',
                                    sanitized_title).replace('\n', u'\u23CE')

        prefix = u"[ [SmokeDetector](//goo.gl/eLDYqh) ]"
        if GlobalVars.metasmoke_key:
            prefix_ms = u"[ [SmokeDetector](//goo.gl/eLDYqh) | [MS](//m.erwaysoftware.com/posts/by-url?url=" + \
                        post_url + ") ]"
        else:
            prefix_ms = prefix

        if not post.user_name.strip() or (not poster_url
                                          or poster_url.strip() == ""):
            s = u" {}: [{}]({}) by a deleted user on `{}`".format(
                reason, sanitized_title.strip(), post_url, shortened_site)
            username = ""
        else:
            s = u" {}: [{}]({}) by [{}]({}) on `{}`".format(
                reason, sanitized_title.strip(), post_url,
                post.user_name.strip(), poster_url, shortened_site)
            username = post.user_name.strip()

        Tasks.do(metasmoke.Metasmoke.send_stats_on_post,
                 post.title_ignore_type, post_url, reasons, post.body,
                 username, post.user_link, why, post.owner_rep,
                 post.post_score, post.up_vote_count, post.down_vote_count)

        log('debug',
            GlobalVars.parser.unescape(s).encode('ascii', errors='replace'))
        datahandling.append_to_latest_questions(post.post_site, post.post_id,
                                                post.title)

        message = prefix_ms + s
        if len(message) > 500:
            message = (prefix + s)[:500]

        without_roles = tuple(
            "no-" + reason
            for reason in reasons) + ("site-no-" + post.post_site, )

        if set(reason) & GlobalVars.experimental_reasons == {}:
            chatcommunicate.tell_rooms(message, ("experimental"),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
        else:
            chatcommunicate.tell_rooms(message,
                                       ("all", "site-" + post.post_site),
                                       without_roles,
                                       notify_site=post.post_site,
                                       report_data=(post_url, poster_url))
    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
Beispiel #34
0
def schedule_store_bodyfetcher_max_ids():
    global bodyfetcher_max_ids_save_handle
    with bodyfetcher_max_ids_save_handle_lock:
        if bodyfetcher_max_ids_save_handle:
            bodyfetcher_max_ids_save_handle.cancel()
        bodyfetcher_max_ids_save_handle = Tasks.do(store_bodyfetcher_max_ids)