Exemple #1
0
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)
        url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw(("
        # wait to make sure API has/updates post data
        time.sleep(60)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        GlobalVars.apiquota = response["quota_remaining"]

        for post in response["items"]:
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False)
            if owner_rep <= 50 and is_spam:
                try:
                    handle_spam(title, owner_name, site, link, owner_link, q_id, reason, False)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False)
                    if owner_rep <= 50 and is_spam:
                        try:
                            handle_spam(title, owner_name, site, link, owner_link, a_id, reason, True)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
 def test_handle_spam_repeating_characters(cls):
     post = cls.mock_post(title='aaaaaaaaaaaaaa')
     is_spam, reasons, why = check_if_spam(post)
     handle_spam(post=post, reasons=reasons, why=why)
     chatcommunicate.tell_rooms.assert_called_once_with(
         Matcher(containing='aaaaaaaaaaaaaa', without='Potentially offensive title'),
         ANY,
         ANY,
         notify_site=ANY,
         report_data=ANY
     )
Exemple #3
0
def test_handle_spam_offensive_title():
    GlobalVars.deletion_watcher = MagicMock(
    )  # Mock the deletion watcher in test
    chatcommunicate.tell_rooms = MagicMock()
    post = mock_post(title='f**k')
    is_spam, reasons, why = check_if_spam(post)
    handle_spam(post, reasons, why)
    chatcommunicate.tell_rooms.assert_called_once_with(StringMatcher(
        containing='Potentially offensive title', without='f**k'),
                                                       ANY,
                                                       ANY,
                                                       notify_site=ANY,
                                                       report_data=ANY)
 def test_handle_spam_offensive_title(cls):
     post = cls.mock_post(title='f**k')
     is_spam, reasons, why = check_if_spam(post)
     handle_spam(post=post, reasons=reasons, why=why)
     call_a = call(
         Matcher(containing='f**k', without='potentially offensive title'),
         ANY,
         Matcher(containing='offensive-mask', without='no-offensive-mask'),
         notify_site=ANY,
         report_data=ANY
     )
     call_b = call(
         Matcher(containing='potentially offensive title', without='f**k'),
         ANY,
         Matcher(containing='no-offensive-mask', without='offensive-mask'),
         notify_site=ANY,
         report_data=ANY
     )
     chatcommunicate.tell_rooms.assert_has_calls([call_a, call_b])
Exemple #5
0
    def handle_websocket_data(data):
        if "message" not in data:
            return
        message = data['message']
        if not isinstance(message, Iterable):
            return

        if "message" in message:
            chatcommunicate.tell_rooms_with("metasmoke", message['message'])
        elif "autoflag_fp" in message:
            event = message["autoflag_fp"]

            chatcommunicate.tell_rooms(event["message"],
                                       ("debug", "site-" + event["site"]),
                                       ("no-site-" + event["site"], ),
                                       notify_site="/autoflag_fp")
        elif "exit" in message:
            os._exit(message["exit"])
        elif "blacklist" in message:
            ids = (message['blacklist']['uid'], message['blacklist']['site'])

            datahandling.add_blacklisted_user(ids, "metasmoke",
                                              message['blacklist']['post'])
            datahandling.last_feedbacked = (ids, time.time() + 60)
        elif "unblacklist" in message:
            ids = (message['unblacklist']['uid'],
                   message['unblacklist']['site'])
            datahandling.remove_blacklisted_user(ids)
        elif "naa" in message:
            post_site_id = parsing.fetch_post_id_and_site_from_url(
                message["naa"]["post_link"])
            datahandling.add_ignored_post(post_site_id[0:2])
        elif "fp" in message:
            post_site_id = parsing.fetch_post_id_and_site_from_url(
                message["fp"]["post_link"])
            datahandling.add_false_positive(post_site_id[0:2])
        elif "report" in message:
            import chatcommands  # Do it here
            chatcommands.report_posts([message["report"]["post_link"]],
                                      "the metasmoke API", None,
                                      "the metasmoke API")
            return
            post_data = apigetpost.api_get_post(message["report"]["post_link"])
            if post_data is None or post_data is False:
                return
            if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                    and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                return
            user = parsing.get_user_from_url(post_data.owner_url)
            post = classes.Post(api_response=post_data.as_dict)

            scan_spam, scan_reasons, scan_why = spamhandling.check_if_spam(
                post)
            if scan_spam:
                why_append = u"This post would have also been caught for: " + \
                    u", ".join(scan_reasons).capitalize() + "\n" + scan_why
            else:
                why_append = u"This post would not have been caught otherwise."

            # Add user to blacklist *after* post is scanned
            if user is not None:
                datahandling.add_blacklisted_user(user, "metasmoke",
                                                  post_data.post_url)

            why = u"Post manually reported by user *{}* from metasmoke.\n\n{}".format(
                message["report"]["user"], why_append)

            spamhandling.handle_spam(
                post=post,
                reasons=["Manually reported " + post_data.post_type],
                why=why)
        elif "deploy_updated" in message:
            return  # Disabled
            sha = message["deploy_updated"]["head_commit"]["id"]
            if sha != os.popen('git log -1 --pretty="%H"').read():
                if "autopull" in message["deploy_updated"]["head_commit"][
                        "message"]:
                    if only_blacklists_changed(GitManager.get_remote_diff()):
                        commit_md = "[`{0}`](https://github.com/{1}/commit/{0})" \
                                    .format(sha[:7], GlobalVars.bot_repo_slug)
                        integrity = blacklist_integrity_check()
                        if len(integrity) == 0:  # No issues
                            GitManager.pull_remote()
                            findspam.reload_blacklists()
                            chatcommunicate.tell_rooms_with(
                                "debug",
                                "No code modified in {0}, only blacklists"
                                " reloaded.".format(commit_md))
                        else:
                            integrity.append("please fix before pulling.")
                            chatcommunicate.tell_rooms_with(
                                "debug", ", ".join(integrity))
        elif "commit_status" in message:
            c = message["commit_status"]
            sha = c["commit_sha"][:7]
            if c["commit_sha"] == sp.check_output(
                ["git", "log", "-1", "--pretty=%H"]).decode('utf-8').strip():
                return

            if c["status"] == "success":
                if "autopull" in c["commit_message"]:
                    s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/" \
                        "commit/{commit_sha}) succeeded. Message contains 'autopull', pulling...".format(
                            ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha)
                    remote_diff = GitManager.get_remote_diff()
                    if only_blacklists_changed(remote_diff):
                        GitManager.pull_remote()
                        if not GlobalVars.on_master:
                            # Restart if HEAD detached
                            log('warning',
                                "Pulling remote with HEAD detached, checkout deploy",
                                f=True)
                            os._exit(8)
                        GlobalVars.reload()
                        findspam.FindSpam.reload_blacklists()
                        chatcommunicate.tell_rooms_with(
                            'debug', GlobalVars.s_norestart)
                    elif only_modules_changed(remote_diff):
                        GitManager.pull_remote()
                        if not GlobalVars.on_master:
                            # Restart if HEAD detached
                            log('warning',
                                "Pulling remote with HEAD detached, checkout deploy",
                                f=True)
                            os._exit(8)
                        GlobalVars.reload()
                        reload_modules()
                        chatcommunicate.tell_rooms_with(
                            'debug', GlobalVars.s_norestart2)
                    else:
                        chatcommunicate.tell_rooms_with('debug',
                                                        s,
                                                        notify_site="/ci")
                        os._exit(3)
                else:
                    s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/commit/{commit_sha}) " \
                        "succeeded.".format(ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha)

                    chatcommunicate.tell_rooms_with("debug",
                                                    s,
                                                    notify_site="/ci")
            elif c["status"] == "failure":
                s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/commit/{commit_sha}) " \
                    "failed.".format(ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha)

                chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci")
        elif "everything_is_broken" in message:
            if message["everything_is_broken"] is True:
                os._exit(6)
def handle_commands(content_lower, message_parts, ev_room, ev_room_name, ev_user_id, ev_user_name, wrap2, content, message_id):
    message_url = "//chat." + wrap2.host + "/transcript/message/" + str(message_id)
    second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower()
    if second_part_lower == "f":
        second_part_lower = "fp-"
    if second_part_lower == "k":
        second_part_lower = "tpu-"
    if re.compile("^:[0-9]+$").search(message_parts[0]):
        msg_id = int(message_parts[0][1:])
        msg = wrap2.get_message(msg_id)
        msg_content = msg.content_source
        quiet_action = ("-" in second_part_lower)
        if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None:
            return
        post_url = fetch_post_url_from_msg_content(msg_content)
        post_site_id = fetch_post_id_and_site_from_msg_content(msg_content)
        if post_site_id is not None:
            post_type = post_site_id[2]
        else:
            post_type = None
        if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            add_false_positive((post_site_id[0], post_site_id[1]))
            user_added = False
            if second_part_lower.startswith("falseu") or second_part_lower.startswith("fpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_whitelisted_user(user)
                        user_added = True
            if post_type == "question":
                if user_added and not quiet_action:
                    return "Registered question as false positive and whitelisted user."
                elif not quiet_action:
                    return "Registered question as false positive."
            elif post_type == "answer":
                if user_added and not quiet_action:
                    return "Registered answer as false positive and whitelisted user."
                elif not quiet_action:
                    return "Registered answer as false positive."
            try:
                msg.delete()
            except:
                pass
        if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            user_added = False
            if second_part_lower.startswith("trueu") or second_part_lower.startswith("tpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_blacklisted_user(user, message_url, "http:" + post_url)
                        user_added = True
            if post_type == "question":
                if not quiet_action:
                    if user_added:
                        return "Blacklisted user and registered question as true positive."
                    return "Recorded question as true positive in metasmoke. Use `tpu` or `trueu` if you want to blacklist a user."
                else:
                    return None
            elif post_type == "answer":
                if not quiet_action:
                    if user_added:
                        return "Blacklisted user."
                    return "Recorded answer as true positive in metasmoke. If you want to blacklist the poster of the answer, use `trueu` or `tpu`."
                else:
                    return None
        if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            add_ignored_post(post_site_id[0:2])
            if not quiet_action:
                return "Post ignored; alerts about it will no longer be posted."
            else:
                return None
        if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone") or second_part_lower.startswith("poof") or
                second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2):
            try:
                msg.delete()
            except:
                pass  # couldn't delete message
        if second_part_lower.startswith("postgone") and is_privileged(ev_room, ev_user_id, wrap2):
            edited = edited_message_after_postgone_command(msg_content)
            if edited is None:
                return "That's not a report."
            msg.edit(edited)
            return None
        if second_part_lower.startswith("why"):
            t = fetch_post_id_and_site_from_msg_content(msg_content)
            if t is None:
                t = fetch_user_from_allspam_report(msg_content)
                if t is None:
                    return "That's not a report."
                why = get_why_allspam(t)
                if why is None or why == "":
                    return "There is no `why` data for that user (anymore)."
                else:
                    return why
            post_id, site, _ = t
            why = get_why(site, post_id)
            if why is None or why == "":
                return "There is no `why` data for that post (anymore)."
            else:
                return why
    if content_lower.startswith("!!/addblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_blacklisted_user((uid, val), message_url, "")
            return "User blacklisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`."
    if content_lower.startswith("!!/rmblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if remove_blacklisted_user((uid, val)):
                return "User removed from blacklist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return False, "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`."
    if content_lower.startswith("!!/isblu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_blacklisted_user((uid, val)):
                return "User is blacklisted (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return False, "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`."
    if content_lower.startswith("!!/addwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_whitelisted_user((uid, val))
            return "User whitelisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return False, "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`."
    if content_lower.startswith("!!/rmwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid != -1 and val != "":
            if remove_whitelisted_user((uid, val)):
                return "User removed from whitelist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return False, "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`."
    if content_lower.startswith("!!/iswlu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_whitelisted_user((uid, val)):
                return "User is whitelisted (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return False, "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`."
    if content_lower.startswith("!!/report") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        crn, wait = can_report_now(ev_user_id, wrap2.host)
        if not crn:
            return "You can execute the !!/report command again in {} seconds. " \
                   "To avoid one user sending lots of reports in a few commands and slowing SmokeDetector down due to rate-limiting, " \
                   "you have to wait 30 seconds after you've reported multiple posts using !!/report, even if your current command just has one URL. " \
                   "(Note that this timeout won't be applied if you only used !!/report for one post)".format(wait)
        if len(message_parts) < 2:
            return False, "Not enough arguments."
        output = []
        index = 0
        urls = list(set(message_parts[1:]))
        if len(urls) > 5:
            return False, "To avoid SmokeDetector reporting posts too slowly, " \
                          "you can report at most 5 posts at a time. " \
                          "This is to avoid SmokeDetector's chat messages getting rate-limited too much, " \
                          "which would slow down reports."
        for url in urls:
            index += 1
            post_data = api_get_post(url)
            if post_data is None:
                output.append("Post {}: That does not look like a valid post URL.".format(index))
                continue
            if post_data is False:
                output.append("Post {}: Could not find data for this post in the API. It may already have been deleted.".format(index))
                continue
            user = get_user_from_url(post_data.owner_url)
            if user is not None:
                add_blacklisted_user(user, message_url, post_data.post_url)
            why = u"Post manually reported by user *{}* in room *{}*.\n".format(ev_user_name, ev_room_name.decode('utf-8'))
            batch = ""
            if len(urls) > 1:
                batch = " (batch report: post {} out of {})".format(index, len(urls))
            handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url,
                        post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type + batch],
                        post_data.post_type == "answer", why, post_data.owner_rep, post_data.score, post_data.up_vote_count,
                        post_data.down_vote_count)
        if 1 < len(urls) > len(output):
            add_or_update_multiple_reporter(ev_user_id, wrap2.host, time.time())
        if len(output) > 0:
            return os.linesep.join(output)
        else:
            return None
    if content_lower.startswith("!!/allspam") and is_privileged(ev_room, ev_user_id, wrap2):
        if len(message_parts) != 2:
            return False, "1 argument expected"
        url = message_parts[1]
        user = get_user_from_url(url)
        if user is None:
            return "That doesn't look like a valid user URL."
        why = u"User manually reported by *{}* in room *{}*.\n".format(ev_user_name, ev_room_name.decode('utf-8'))
        handle_user_with_all_spam(user, why)
    if content_lower.startswith("!!/wut"):
        return "Whaddya mean, 'wut'? Humans..."
    if content_lower.startswith("!!/lick"):
        return "*licks ice cream cone*"
    if content_lower.startswith("!!/alive"):
        if ev_room == GlobalVars.charcoal_room_id:
            return 'Of course'
        elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id:
            return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?',
                                  'plz send teh coffee',
                                  'Watching this endless list of new questions *never* gets boring',
                                  'Kinda sorta'])
    if content_lower.startswith("!!/rev") or content_lower.startswith("!!/ver"):
            return '[' + \
                GlobalVars.commit_with_author + \
                '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \
                GlobalVars.commit + \
                ')'
    if content_lower.startswith("!!/status"):
            now = datetime.utcnow()
            diff = now - UtcDate.startup_utc_date
            minutes, remainder = divmod(diff.seconds, 60)
            minutestr = "minutes" if minutes != 1 else "minute"
            return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr)
    if content_lower.startswith("!!/reboot"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(5)
    if content_lower.startswith("!!/stappit"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(6)
    if content_lower.startswith("!!/master"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            os._exit(8)
    if content_lower.startswith("!!/clearbl"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            if os.path.isfile("blacklistedUsers.txt"):
                os.remove("blacklistedUsers.txt")
                GlobalVars.blacklisted_users = []
                return "Kaboom, blacklisted users cleared."
            else:
                return "There are no blacklisted users at the moment."
    if content_lower.startswith("!!/block"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            timeToBlock = content_lower[9:].strip()
            timeToBlock = int(timeToBlock) if timeToBlock else 0
            if 0 < timeToBlock < 14400:
                GlobalVars.blockedTime = time.time() + timeToBlock
            else:
                GlobalVars.blockedTime = time.time() + 900
            GlobalVars.charcoal_hq.send_message("Reports blocked for {} seconds.".format(GlobalVars.blockedTime - time.time()))
            return "blocked"
    if content_lower.startswith("!!/unblock"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            GlobalVars.blockedTime = time.time()
            GlobalVars.charcoal_hq.send_message("Reports unblocked.")
            return "unblocked"
    if content_lower.startswith("!!/errorlogs"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            count = -1
            if len(message_parts) != 2:
                return "The !!/errorlogs command requires 1 argument."
            try:
                count = int(message_parts[1])
            except ValueError:
                pass
            if count == -1:
                return "Invalid argument."
            logs_part = fetch_lines_from_error_log(count)
            post_message_in_room(ev_room, logs_part, False)
    if content_lower.startswith("!!/pull"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master')
            latest_sha = r.json()["object"]["sha"]
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses')
            states = []
            for status in r.json():
                state = status["state"]
                states.append(state)
            if "success" in states:
                os._exit(3)
            elif "error" in states or "failure" in states:
                return "CI build failed! :( Please check your commit."
            elif "pending" in states or not states:
                return "CI build is still pending, wait until the build has finished and then pull again."
    if content_lower.startswith("!!/help") or content_lower.startswith("!!/info"):
        return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and offensive posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)."
    if content_lower.startswith("!!/apiquota"):
        return "The current API quota remaining is {}.".format(GlobalVars.apiquota)
    if content_lower.startswith("!!/whoami"):
        if (ev_room in GlobalVars.smokeDetector_user_id):
            return "My id for this room is {}.".format(GlobalVars.smokeDetector_user_id[ev_room])
        else:
            return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)"
    if content_lower.startswith("!!/location"):
        return GlobalVars.location
    if content_lower.startswith("!!/queuestatus"):
        post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False)
    if content_lower.startswith("!!/blame"):
        GlobalVars.users_chatting[ev_room] = list(set(GlobalVars.users_chatting[ev_room]))  # Make unique
        user_to_blame = random.choice(GlobalVars.users_chatting[ev_room])
        return u"It's [{}]({})'s fault.".format(user_to_blame[0], user_to_blame[1])
    if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()):
        return "Liar"
    if content_lower.startswith("!!/coffee"):
        return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/tea"):
        return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/brownie"):
        return "Brown!"
    if content_lower.startswith("!!/hats"):
        wb_end = datetime(2016, 1, 4, 0, 0, 0)
        now = datetime.utcnow()
        if wb_end > now:
            diff = wb_end - now
            hours, remainder = divmod(diff.seconds, 3600)
            minutes, seconds = divmod(remainder, 60)
            daystr = "days" if diff.days != 1 else "day"
            hourstr = "hours" if hours != 1 else "hour"
            minutestr = "minutes" if minutes != 1 else "minute"
            secondstr = "seconds" if seconds != 1 else "second"
            return "HURRY UP AND EARN MORE HATS! Winterbash will be over in {} {}, {} {}, {} {}, and {} {}. :(".format(diff.days, daystr, hours, hourstr, minutes, minutestr, seconds, secondstr)
        else:
            return "Winterbash is over. :("
    if content_lower.startswith("!!/test"):
        string_to_test = content[8:]
        if len(string_to_test) == 0:
            return "Nothing to test"
        result = "> "
        reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False, 1, 0)
        if len(reasons) == 0:
            result += "Would not be caught for title, body, and username."
            return result
        result += ", ".join(reasons).capitalize()
        if why is not None and len(why) > 0:
            result += "\n----------\n"
            result += why
        return result
    if content_lower.startswith("!!/amiprivileged"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            return "Yes, you are a privileged user."
        else:
            return "No, you are not a privileged user."
    if content_lower.startswith("!!/notify"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."
        else:
            room_id = int(room_id)
        quiet_action = ("-" in message_parts[2])
        se_site = message_parts[2].replace('-', '')
        r, full_site = add_to_notification_list(user_id, chat_site, room_id, se_site)
        if r == 0:
            if not quiet_action:
                return "You'll now get pings from me if I report a post on `%s`, in room `%s` on `chat.%s`" % (full_site, room_id, chat_site)
            else:
                return None
        elif r == -1:
            return "That notification configuration is already registered."
        elif r == -2:
            return False, "The given SE site does not exist."
    if content_lower.startswith("!!/unnotify"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."
        else:
            room_id = int(room_id)
        quiet_action = ("-" in message_parts[2])
        se_site = message_parts[2].replace('-', '')
        r = remove_from_notification_list(user_id, chat_site, room_id, se_site)
        if r:
            if not quiet_action:
                return "I will no longer ping you if I report a post on `%s`, in room `%s` on `chat.%s`" % (se_site, room_id, chat_site)
            else:
                return None
        else:
            return "That configuration doesn't exist."
    if content_lower.startswith("!!/willibenotified"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid"
        else:
            room_id = int(room_id)
        se_site = message_parts[2]
        will_be_notified = will_i_be_notified(user_id, chat_site, room_id, se_site)
        if will_be_notified:
            return "Yes, you will be notified for that site in that room."
        else:
            return "No, you won't be notified for that site in that room."
    if content_lower.startswith("!!/allnotificationsites"):
        if len(message_parts) != 2:
            return False, "1 argument expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."
        sites = get_all_notification_sites(user_id, chat_site, room_id)
        if len(sites) == 0:
            return "You won't get notified for any sites in that room."
        else:
            return "You will get notified for these sites:\r\n" + ", ".join(sites)
    return False, None  # Unrecognized command, can be edited later.
Exemple #7
0
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)
        store_bodyfetcher_queue()

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            min_query = ""
            if self.last_activity_date != 0:
                min_query = "&min=" + str(self.last_activity_date)
                pagesize = "50"
            else:
                pagesize = "25"
            url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query
        else:
            url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw(("
        # wait to make sure API has/updates post data
        time.sleep(60)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        add_or_update_api_data(site)

        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message("API quota rolled over with {} requests remaining.".format(GlobalVars.apiquota))
                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site, quota_used in sorted_calls_per_site:
                    api_quota_used_per_site = api_quota_used_per_site + site.replace('.com', '').replace('.stackexchange', '') + ": " + str(quota_used) + "\n"
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False)
                clear_api_data()

            elif response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message("API reports no quota left!  May be a glitch.")
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            GlobalVars.charcoal_hq.send_message("The quota_remaining property was not in the API response.")

        if site == "stackoverflow.com":
            if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]:
                self.last_activity_date = response["items"][0]["last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep)
            if is_spam:
                try:
                    handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep)
                    if is_spam:
                        try:
                            handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
    def make_api_call_for_site(self, site):
        self.queue_modify_lock.acquire()
        if site not in self.queue:
            GlobalVars.charcoal_hq.send_message("Attempted API call to {} but there are no posts to fetch.".format(site))
            return
        posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join(str(post) for post in posts))

        url = "http://api.stackexchange.com/2.2/questions{q_modifier}?site={site}&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(({optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message("API quota rolled over with {0} requests remaining. Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"]))
                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    api_quota_used_per_site += site_name.replace('.com', '').replace('.stackexchange', '') + ": {0}\n".format(str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message("API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message("Restart: API quota is {quota}.".format(quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {}.".format(response["error_message"])

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]
            match = regex.compile('/2.2/([^.]*)').search(url)
            url_part = match.group(1) if match else url
            message_hq += "\nBackoff received of {} seconds on request to `{}`".format(str(response["backoff"]), url_part)

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score)
            if is_spam:
                try:
                    handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count, None)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score)
                    if is_spam:
                        try:
                            handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count, q_id)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)

        self.queue_store_lock.acquire()
        store_bodyfetcher_queue()
        self.queue_store_lock.release()

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            min_query = ""
            if self.last_activity_date != 0:
                min_query = "&min=" + str(self.last_activity_date)
                pagesize = "50"
            else:
                pagesize = "25"
            url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query
        else:
            url = "http://api.stackexchange.com/2.2/questions/" + ";".join(
                str(x) for x in posts
            ) + "?site=" + site + "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(("

        # wait to make sure API has/updates post data
        time.sleep(3)
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response[
                    "quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message(
                    "API quota rolled over with {} requests remaining.".format(
                        GlobalVars.apiquota))
                sorted_calls_per_site = sorted(
                    GlobalVars.api_calls_per_site.items(),
                    key=itemgetter(1),
                    reverse=True)
                api_quota_used_per_site = ""
                for site, quota_used in sorted_calls_per_site:
                    api_quota_used_per_site = api_quota_used_per_site + site.replace(
                        '.com', '').replace('.stackexchange',
                                            '') + ": " + str(quota_used) + "\n"
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site,
                                                    False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message(
                    "API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(
                    str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message(
                    "Restart: API quota is {}.".format(
                        response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq = message_hq + " Error: {}.".format(
                response["error_message"])

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]
            message_hq = message_hq + "\n" + "Backoff received of " + str(
                response["backoff"]) + " seconds."

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            if len(response["items"]
                   ) > 0 and "last_activity_date" in response["items"][0]:
                self.last_activity_date = response["items"][0][
                    "last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(
                    post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title, body, owner_name,
                                                 owner_link, site, q_id, False,
                                                 False, owner_rep, post_score)
            if is_spam:
                try:
                    handle_spam(title, body, owner_name, site, link,
                                owner_link, q_id, reason, False, why,
                                owner_rep, post_score, up_vote_count,
                                down_vote_count)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(
                            answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(
                        answer_title, body, owner_name, owner_link, site, a_id,
                        True, False, owner_rep, post_score)
                    if is_spam:
                        try:
                            handle_spam(title, body, owner_name, site, link,
                                        owner_link, a_id, reason, True, why,
                                        owner_rep, post_score, up_vote_count,
                                        down_vote_count)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
    def make_api_call_for_site(self, site):
        with self.queue_lock:
            new_posts = self.queue.pop(site, None)
        if new_posts is None:
            # site was not in the queue
            return
        Tasks.do(store_bodyfetcher_queue)

        new_post_ids = [int(k) for k in new_posts.keys()]

        if GlobalVars.flovis is not None:
            for post_id in new_post_ids:
                GlobalVars.flovis.stage('bodyfetcher/api_request', site,
                                        post_id, {
                                            'site': site,
                                            'posts': list(new_posts.keys())
                                        })

        # Add queue timing data
        pop_time = datetime.utcnow()
        post_add_times = [(pop_time - v).total_seconds()
                          for k, v in new_posts.items()]
        Tasks.do(add_queue_timing_data, site, post_add_times)

        store_max_ids = False
        with self.max_ids_modify_lock:
            if site in self.previous_max_ids and max(
                    new_post_ids) > self.previous_max_ids[site]:
                previous_max_id = self.previous_max_ids[site]
                intermediate_posts = range(previous_max_id + 1,
                                           max(new_post_ids))

                # We don't want to go over the 100-post API cutoff, so take the last
                # (100-len(new_post_ids)) from intermediate_posts

                intermediate_posts = intermediate_posts[-(100 -
                                                          len(new_post_ids)):]

                # new_post_ids could contain edited posts, so merge it back in
                combined = chain(intermediate_posts, new_post_ids)

                # Could be duplicates, so uniquify
                posts = list(set(combined))
            else:
                posts = new_post_ids

            new_post_ids_max = max(new_post_ids)
            if new_post_ids_max > self.previous_max_ids.get(site, 0):
                self.previous_max_ids[site] = new_post_ids_max
                store_max_ids = True

        if store_max_ids:
            schedule_store_bodyfetcher_max_ids()

        log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site))
        if len(new_post_ids) > 30:
            log(
                'debug', "{} +{} more".format(
                    sorted(new_post_ids)[:30],
                    len(new_post_ids) - 30))
        else:
            log('debug', sorted(new_post_ids))
        if len(new_post_ids) == len(posts):
            log('debug', "[ *Identical* ]")
        elif len(posts) > 30:
            log('debug',
                "{} +{} more".format(sorted(posts)[:30],
                                     len(posts) - 30))
        else:
            log('debug', sorted(posts))

        question_modifier = ""
        pagesize_modifier = {}

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            with self.last_activity_date_lock:
                if self.last_activity_date != 0:
                    pagesize = "100"
                else:
                    pagesize = "50"

                pagesize_modifier = {
                    'pagesize':
                    pagesize,
                    'min':
                    str(self.last_activity_date -
                        self.ACTIVITY_DATE_EXTRA_EARLIER_MS_TO_FETCH)
                }
        else:
            question_modifier = "/{0}".format(";".join(
                [str(post) for post in posts]))

        url = "https://api.stackexchange.com/2.2/questions{}".format(
            question_modifier)
        params = {
            'filter':
            '!1rs)sUKylwB)8isvCRk.xNu71LnaxjnPS12*pX*CEOKbPFwVFdHNxiMa7GIVgzDAwMa',
            'key': 'IAkbitmze4B8KpacUfLqkw((',
            'site': site
        }
        params.update(pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        with GlobalVars.api_request_lock:
            # Respect backoff, if we were given one
            if GlobalVars.api_backoff_time > time.time():
                time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
            try:
                time_request_made = datetime.utcnow().strftime('%H:%M:%S')
                response = requests.get(url, params=params, timeout=20).json()
            except (requests.exceptions.Timeout, requests.ConnectionError,
                    Exception):
                # Any failure in the request being made (timeout or otherwise) should be added back to
                # the queue.
                with self.queue_lock:
                    if site in self.queue:
                        self.queue[site].update(new_posts)
                    else:
                        self.queue[site] = new_posts
                return

            with self.api_data_lock:
                add_or_update_api_data(site)

            message_hq = ""
            with GlobalVars.apiquota_rw_lock:
                if "quota_remaining" in response:
                    quota_remaining = response["quota_remaining"]
                    if quota_remaining - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0 \
                            and quota_remaining > 39980:
                        tell_rooms_with(
                            "debug",
                            "API quota rolled over with {0} requests remaining. "
                            "Current quota: {1}.".format(
                                GlobalVars.apiquota, quota_remaining))

                        sorted_calls_per_site = sorted(
                            GlobalVars.api_calls_per_site.items(),
                            key=itemgetter(1),
                            reverse=True)
                        api_quota_used_per_site = ""
                        for site_name, quota_used in sorted_calls_per_site:
                            sanatized_site_name = site_name.replace(
                                '.com', '').replace('.stackexchange', '')
                            api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(
                                str(quota_used))
                        api_quota_used_per_site = api_quota_used_per_site.strip(
                        )

                        tell_rooms_with("debug", api_quota_used_per_site)
                        clear_api_data()
                    if quota_remaining == 0:
                        tell_rooms_with(
                            "debug",
                            "API reports no quota left!  May be a glitch.")
                        tell_rooms_with(
                            "debug", str(response))  # No code format for now?
                    if GlobalVars.apiquota == -1:
                        tell_rooms_with(
                            "debug", "Restart: API quota is {quota}.".format(
                                quota=quota_remaining))
                    GlobalVars.apiquota = quota_remaining
                else:
                    message_hq = "The quota_remaining property was not in the API response."

            if "error_message" in response:
                message_hq += " Error: {} at {} UTC.".format(
                    response["error_message"], time_request_made)
                if "error_id" in response and response["error_id"] == 502:
                    if GlobalVars.api_backoff_time < time.time(
                    ) + 12:  # Add a backoff of 10 + 2 seconds as a default
                        GlobalVars.api_backoff_time = time.time() + 12
                message_hq += " Backing off on requests for the next 12 seconds."
                message_hq += " Previous URL: `{}`".format(url)

            if "backoff" in response:
                if GlobalVars.api_backoff_time < time.time(
                ) + response["backoff"]:
                    GlobalVars.api_backoff_time = time.time(
                    ) + response["backoff"]

        if len(message_hq) > 0 and "site is required" not in message_hq:
            message_hq = message_hq.strip()
            if len(message_hq) > 500:
                message_hq = "\n" + message_hq
            tell_rooms_with("debug", message_hq)

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                with self.last_activity_date_lock:
                    self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            if GlobalVars.flovis is not None:
                pnb = copy.deepcopy(post)
                if 'body' in pnb:
                    pnb['body'] = 'Present, but truncated'
                if 'answers' in pnb:
                    del pnb['answers']

            if "title" not in post or "body" not in post:
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage(
                        'bodyfetcher/api_response/no_content', site,
                        post['question_id'], pnb)
                continue

            post['site'] = site
            try:
                post['edited'] = (post['creation_date'] !=
                                  post['last_edit_date'])
            except KeyError:
                post[
                    'edited'] = False  # last_edit_date not present = not edited

            question_doesnt_need_scan = is_post_recently_scanned_and_unchanged(
                post)
            add_recently_scanned_post(post)
            if not question_doesnt_need_scan:
                try:
                    post_ = Post(api_response=post)
                except PostParseError as err:
                    log(
                        'error', 'Error {0} when parsing post: {1!r}'.format(
                            err, post_))
                    if GlobalVars.flovis is not None and 'question_id' in post:
                        GlobalVars.flovis.stage(
                            'bodyfetcher/api_response/error', site,
                            post['question_id'], pnb)
                    continue

                num_scanned += 1

                is_spam, reason, why = check_if_spam(post_)

                if is_spam:
                    try:
                        if GlobalVars.flovis is not None and 'question_id' in post:
                            GlobalVars.flovis.stage(
                                'bodyfetcher/api_response/spam', site,
                                post['question_id'], {
                                    'post': pnb,
                                    'check_if_spam': [is_spam, reason, why]
                                })
                        handle_spam(post=post_, reasons=reason, why=why)
                    except Exception as e:
                        log('error', "Exception in handle_spam:", e)
                elif GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage(
                        'bodyfetcher/api_response/not_spam', site,
                        post['question_id'], {
                            'post': pnb,
                            'check_if_spam': [is_spam, reason, why]
                        })

            try:
                if "answers" not in post:
                    pass
                else:
                    for answer in post["answers"]:
                        if GlobalVars.flovis is not None:
                            anb = copy.deepcopy(answer)
                            if 'body' in anb:
                                anb['body'] = 'Present, but truncated'

                        num_scanned += 1
                        answer["IsAnswer"] = True  # Necesssary for Post object
                        answer[
                            "title"] = ""  # Necessary for proper Post object creation
                        answer[
                            "site"] = site  # Necessary for proper Post object creation
                        try:
                            answer['edited'] = (answer['creation_date'] !=
                                                answer['last_edit_date'])
                        except KeyError:
                            answer[
                                'edited'] = False  # last_edit_date not present = not edited
                        answer_doesnt_need_scan = is_post_recently_scanned_and_unchanged(
                            answer)
                        add_recently_scanned_post(answer)
                        if answer_doesnt_need_scan:
                            continue
                        answer_ = Post(api_response=answer, parent=post_)

                        is_spam, reason, why = check_if_spam(answer_)
                        if is_spam:
                            try:
                                if GlobalVars.flovis is not None and 'answer_id' in answer:
                                    GlobalVars.flovis.stage(
                                        'bodyfetcher/api_response/spam', site,
                                        answer['answer_id'], {
                                            'post': anb,
                                            'check_if_spam':
                                            [is_spam, reason, why]
                                        })
                                handle_spam(answer_, reasons=reason, why=why)
                            except Exception as e:
                                log('error', "Exception in handle_spam:", e)
                        elif GlobalVars.flovis is not None and 'answer_id' in answer:
                            GlobalVars.flovis.stage(
                                'bodyfetcher/api_response/not_spam', site,
                                answer['answer_id'], {
                                    'post': anb,
                                    'check_if_spam': [is_spam, reason, why]
                                })

            except Exception as e:
                log('error', "Exception handling answers:", e)

        end_time = time.time()
        scan_time = end_time - start_time
        GlobalVars.PostScanStat.add_stat(num_scanned, scan_time)
        return
    def make_api_call_for_site(self, site):
        if site not in self.queue:
            return

        self.queue_modify_lock.acquire()
        new_posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        new_post_ids = [int(k) for k, v in new_posts.items()]

        if GlobalVars.flovis is not None:
            for post_id in new_post_ids:
                GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id,
                                        {'queue':
                                         dict([[sk, [k for k, v in sq.items()]] for sk, sq in self.queue.items()]),
                                         'site': site, 'posts': [k for k, v in new_posts.items()]})

        self.queue_timing_modify_lock.acquire()
        post_add_times = [v for k, v in new_posts.items()]
        pop_time = datetime.utcnow()

        for add_time in post_add_times:
            try:
                seconds_in_queue = (pop_time - add_time).total_seconds()
                if site in self.queue_timings:
                    self.queue_timings[site].append(seconds_in_queue)
                else:
                    self.queue_timings[site] = [seconds_in_queue]
            except:
                continue  # Skip to next item if we've got invalid data or missing values.

        store_queue_timings()

        self.queue_timing_modify_lock.release()
        self.max_ids_modify_lock.acquire()

        if site in self.previous_max_ids and max(new_post_ids) > self.previous_max_ids[site]:
            previous_max_id = self.previous_max_ids[site]
            intermediate_posts = range(previous_max_id + 1, max(new_post_ids))

            # We don't want to go over the 100-post API cutoff, so take the last
            # (100-len(new_post_ids)) from intermediate_posts

            intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):]

            # new_post_ids could contain edited posts, so merge it back in
            combined = chain(intermediate_posts, new_post_ids)

            # Could be duplicates, so uniquify
            posts = list(set(combined))
        else:
            posts = new_post_ids

        try:
            if max(new_post_ids) > self.previous_max_ids[site]:
                self.previous_max_ids[site] = max(new_post_ids)
                store_bodyfetcher_max_ids()
        except KeyError:
            self.previous_max_ids[site] = max(new_post_ids)
            store_bodyfetcher_max_ids()

        self.max_ids_modify_lock.release()

        log('debug', "New IDs / Hybrid Intermediate IDs for {0}:".format(site))
        log('debug', sorted(new_post_ids))
        log('debug', sorted(posts))

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}" \
                                "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join(str(post) for post in posts))

        url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \
              "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \
              "{optional_min_query_param}".format(q_modifier=question_modifier, site=site,
                                                  optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        GlobalVars.api_request_lock.acquire()
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            time_request_made = datetime.now().strftime('%H:%M:%S')
            response = requests.get(url, timeout=20).json()
        except (requests.exceptions.Timeout, requests.ConnectionError, Exception):
            # Any failure in the request being made (timeout or otherwise) should be added back to
            # the queue.
            self.queue_modify_lock.acquire()
            if site in self.queue:
                self.queue[site].update(new_posts)
            else:
                self.queue[site] = new_posts
            self.queue_modify_lock.release()
            GlobalVars.api_request_lock.release()
            return

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                tell_rooms_with("debug", "API quota rolled over with {0} requests remaining. "
                                         "Current quota: {1}.".format(GlobalVars.apiquota,
                                                                      response["quota_remaining"]))

                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '')
                    api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()

                tell_rooms_with("debug", api_quota_used_per_site)
                clear_api_data()
            if response["quota_remaining"] == 0:
                tell_rooms_with("debug", "API reports no quota left!  May be a glitch.")
                tell_rooms_with("debug", str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                tell_rooms_with("debug", "Restart: API quota is {quota}."
                                         .format(quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made)
            if "error_id" in response and response["error_id"] == 502:
                if GlobalVars.api_backoff_time < time.time() + 12:  # Add a backoff of 10 + 2 seconds as a default
                    GlobalVars.api_backoff_time = time.time() + 12
            message_hq += " Backing off on requests for the next 12 seconds."
            message_hq += " Previous URL: `{}`".format(url)

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]

        GlobalVars.api_request_lock.release()

        if len(message_hq) > 0:
            tell_rooms_with("debug", message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage('bodyfetcher/api_response/no_content', site, post['question_id'], post)
                continue

            post['site'] = site
            try:
                post_ = Post(api_response=post)
            except PostParseError as err:
                log('error', 'Error {0} when parsing post: {1!r}'.format(err, post_))
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage('bodyfetcher/api_response/error', site, post['question_id'], post)
                continue

            num_scanned += 1

            is_spam, reason, why = check_if_spam(post_)

            if is_spam:
                try:
                    if GlobalVars.flovis is not None and 'question_id' in post:
                        GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, post['question_id'],
                                                {'post': post, 'check_if_spam': [is_spam, reason, why]})
                    handle_spam(post=post_,
                                reasons=reason,
                                why=why)
                except Exception as e:
                    log('error', "Exception in handle_spam:", e)
            elif GlobalVars.flovis is not None and 'question_id' in post:
                GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, post['question_id'],
                                        {'post': post, 'check_if_spam': [is_spam, reason, why]})

            try:
                if "answers" not in post:
                    pass
                else:
                    for answer in post["answers"]:
                        num_scanned += 1
                        answer["IsAnswer"] = True  # Necesssary for Post object
                        answer["title"] = ""  # Necessary for proper Post object creation
                        answer["site"] = site  # Necessary for proper Post object creation
                        answer_ = Post(api_response=answer, parent=post_)

                        is_spam, reason, why = check_if_spam(answer_)
                        if is_spam:
                            try:
                                if GlobalVars.flovis is not None and 'answer_id' in answer:
                                    GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, answer['answer_id'],
                                                            {'post': answer, 'check_if_spam': [is_spam, reason, why]})
                                handle_spam(answer_,
                                            reasons=reason,
                                            why=why)
                            except Exception as e:
                                log('error', "Exception in handle_spam:", e)
                        elif GlobalVars.flovis is not None and 'answer_id' in answer:
                            GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, answer['answer_id'],
                                                    {'post': answer, 'check_if_spam': [is_spam, reason, why]})

            except Exception as e:
                log('error', "Exception handling answers:", e)

        end_time = time.time()
        GlobalVars.posts_scan_stats_lock.acquire()
        GlobalVars.num_posts_scanned += num_scanned
        GlobalVars.post_scan_time += end_time - start_time
        GlobalVars.posts_scan_stats_lock.release()
        return
def handle_commands(content_lower, message_parts, ev_room, ev_user_id, ev_user_name, wrap2, content, message_id):
    message_url = "//chat." + wrap2.host + "/transcript/message/" + str(message_id)
    second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower()
    if re.compile("^:[0-9]+$").search(message_parts[0]):
        msg_id = int(message_parts[0][1:])
        msg = wrap2.get_message(msg_id)
        msg_content = msg.content_source
        quiet_action = ("-" in message_parts[1].lower())
        if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None:
            return
        post_url = fetch_post_url_from_msg_content(msg_content)
        post_site_id = fetch_post_id_and_site_from_msg_content(msg_content)
        if post_site_id is not None:
            post_type = post_site_id[2]
        else:
            post_type = None
        if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            add_false_positive((post_site_id[0], post_site_id[1]))
            user_added = False
            if message_parts[1].lower().startswith("falseu") or message_parts[1].lower().startswith("fpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_whitelisted_user(user)
                        user_added = True
            learned = False
            if post_type == "question":
                learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "good")
                if learned and user_added and not quiet_action:
                    return "Registered question as false positive, whitelisted user and added title to Bayesian doctype 'good'."
                elif learned and not quiet_action:
                    return "Registered question as false positive and added title to Bayesian doctype 'good'."
                elif not learned:
                    return "Registered question as false positive, but could not add title to Bayesian doctype 'good'."
            elif post_type == "answer":
                if user_added and not quiet_action:
                    return "Registered answer as false positive and whitelisted user."
                elif not quiet_action:
                    return "Registered answer as false positive."
            try:
                msg.delete()
            except:
                pass
        if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            learned = False
            user_added = False
            if message_parts[1].lower().startswith("trueu") or message_parts[1].lower().startswith("tpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_blacklisted_user(user, message_url, post_url)
                        user_added = True
            if post_type == "question":
                learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "bad")
                if learned and user_added and not quiet_action:
                    return "Blacklisted user and registered question as true positive: added title to the Bayesian doctype 'bad'."
                elif learned and not quiet_action:
                    return "Registered question as true positive: added title to the Bayesian doctype 'bad'."
                elif not learned:
                    return "Something went wrong when registering question as true positive."
            elif post_type == "answer":
                if user_added and not quiet_action:
                    return "Blacklisted user."
                elif not user_added:
                    return "`true`/`tp` cannot be used for answers because their job is to add the title of the *question* to the Bayesian doctype 'bad'. If you want to blacklist the poster of the answer, use `trueu` or `tpu`."
        if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."
            add_ignored_post(post_site_id[0:2])
            if not quiet_action:
                return "Post ignored; alerts about it will no longer be posted."
        if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone") or second_part_lower.startswith("poof")
                or second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2):
            try:
                msg.delete()
            except:
                pass  # couldn't delete message
        if second_part_lower.startswith("why"):
            t = fetch_post_id_and_site_from_msg_content(msg_content)
            if t is None:
                return "That's not a report."
            post_id, site, _ = t
            why = get_why(site, post_id)
            if why is None or why == "":
                return "There is no `why` data for that post (anymore)."
            else:
                return why
    if content_lower.startswith("!!/addblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_blacklisted_user((uid, val), message_url, "")
            return "User blacklisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`."
    if content_lower.startswith("!!/rmblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if remove_blacklisted_user((uid, val)):
                return "User removed from blacklist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`."
    if content_lower.startswith("!!/isblu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_blacklisted_user((uid, val)):
                return "User is blacklisted. (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted. (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`."
    if content_lower.startswith("!!/addwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_whitelisted_user((uid, val))
            return "User whitelisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`."
    if content_lower.startswith("!!/rmwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid != -1 and val != "":
            if remove_whitelisted_user((uid, val)):
                return "User removed from whitelist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`."
    if content_lower.startswith("!!/iswlu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_whitelisted_user((uid, val)):
                return "User is whitelisted. (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted. (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`."
    if content_lower.startswith("!!/report") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        if len(message_parts) < 2:
            return "Not enough arguments."
        url = message_parts[1]
        post_data = api_get_post(url)
        if post_data is None:
            return "That does not look like a valid post URL."
        if post_data is False:
            return "Could not find data for this post in the API. Check whether the post is not deleted yet."
        user = get_user_from_url(post_data.owner_url)
        if user is not None:
            add_blacklisted_user(user, message_url, post_data.post_url)
        bayesian_learn_title(post_data.title, "bad")
        handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url,
                    post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type],
                    post_data.post_type == "answer")
    if content_lower.startswith("!!/wut"):
        return "Whaddya mean, 'wut'? Humans..."
    if content_lower.startswith("!!/lick"):
        return "*licks ice cream cone*"
    if content_lower.startswith("!!/alive"):
        if ev_room == GlobalVars.charcoal_room_id:
            return 'Of course'
        elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id:
            return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?',
                                  'plz send teh coffee',
                                  'Watching this endless list of new questions *never* gets boring',
                                  'Kinda sorta'])
    if content_lower.startswith("!!/rev"):
            return '[' + \
                GlobalVars.commit_with_author + \
                '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \
                GlobalVars.commit + \
                ')'
    if content_lower.startswith("!!/status"):
            now = datetime.utcnow()
            diff = now - UtcDate.startup_utc_date
            minutes, remainder = divmod(diff.seconds, 60)
            minutestr = "minutes" if minutes != 1 else "minute"
            return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr)
    if content_lower.startswith("!!/reboot"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(5)
    if content_lower.startswith("!!/stappit"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(6)
    if content_lower.startswith("!!/master"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            os._exit(8)
    if content_lower.startswith("!!/clearbl"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            if os.path.isfile("blacklistedUsers.txt"):
                os.remove("blacklistedUsers.txt")
                GlobalVars.blacklisted_users = []
                return "Kaboom, blacklisted users cleared."
            else:
                return "There are no blacklisted users at the moment."
    if content_lower.startswith("!!/block"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            timeToBlock = content_lower[9:].strip()
            timeToBlock = int(timeToBlock) if timeToBlock else 0
            if 0 < timeToBlock < 14400:
                GlobalVars.blockedTime = time.time() + timeToBlock
            else:
                GlobalVars.blockedTime = time.time() + 900
            return "blocked"
    if content_lower.startswith("!!/unblock"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            GlobalVars.blockedTime = time.time()
            return "unblocked"
    if content_lower.startswith("!!/errorlogs"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            count = -1
            if len(message_parts) != 2:
                return "The !!/errorlogs command requires 1 argument."
            try:
                count = int(message_parts[1])
            except ValueError:
                pass
            if count == -1:
                return "Invalid argument."
            logs_part = fetch_lines_from_error_log(count)
            post_message_in_room(ev_room, logs_part, False)
    if content_lower.startswith("!!/pull"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master')
            latest_sha = r.json()["object"]["sha"]
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses')
            states = []
            for status in r.json():
                state = status["state"]
                states.append(state)
            if "success" in states:
                os._exit(3)
            elif "error" in states or "failure" in states:
                return "CI build failed! :( Please check your commit."
            elif "pending" in states or not states:
                return "CI build is still pending, wait until the build has finished and then pull again."
    if content_lower.startswith("!!/help"):
        return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and low-quality posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)."
    if content_lower.startswith("!!/apiquota"):
        return GlobalVars.apiquota
    if content_lower.startswith("!!/whoami"):
        if (ev_room in GlobalVars.smokeDetector_user_id):
            return "My id for this room is {}".format(GlobalVars.smokeDetector_user_id[ev_room])
        else:
            return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)"
    if content_lower.startswith("!!/location"):
        return GlobalVars.location
    if content_lower.startswith("!!/queuestatus"):
        post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False)
    if content_lower.startswith("!!/blame") and (ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id):
        GlobalVars.tavern_users_chatting = list(set(GlobalVars.tavern_users_chatting))  # Make unique
        user_to_blame = random.choice(GlobalVars.tavern_users_chatting)
        return "It's " + user_to_blame + "'s fault."
    if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()):
        return "Liar"
    if content_lower.startswith("!!/coffee"):
        return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/tea"):
        return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/brownie"):
        return "Brown!"
    if content_lower.startswith("!!/test"):
        string_to_test = content[8:]
        if len(string_to_test) == 0:
            return "Nothing to test"
        result = "> "
        reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False)
        if len(reasons) == 0:
            result += "Would not be caught for title, body and username."
            return result
        result += ", ".join(reasons).capitalize()
        if why is not None and len(why) > 0:
            result += "\n----------\n"
            result += why
        return result

    return None
Exemple #13
0
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data['message']
        if isinstance(message, Iterable):
            if "message" in message:
                chatcommunicate.tell_rooms_with("metasmoke", message['message'])
            elif "autoflag_fp" in message:
                event = message["autoflag_fp"]

                chatcommunicate.tell_rooms(event["message"], ("debug", "site-" + event["site"]),
                                           ("no-site-" + event["site"],), notify_site="/autoflag_fp")
            elif "exit" in message:
                os._exit(message["exit"])
            elif "blacklist" in message:
                ids = (message['blacklist']['uid'], message['blacklist']['site'])

                datahandling.add_blacklisted_user(ids, "metasmoke", message['blacklist']['post'])
                datahandling.last_feedbacked = (ids, time.time() + 60)
            elif "unblacklist" in message:
                ids = (message['unblacklist']['uid'], message['unblacklist']['site'])
                datahandling.remove_blacklisted_user(ids)
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                        and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                post = classes.Post(api_response=post_data.as_dict)

                scan_spam, scan_reasons, scan_why = spamhandling.check_if_spam(post)
                if scan_spam:
                    why_append = u"This post would have also been caught for: " + \
                        u", ".join(scan_reasons).capitalize() + "\n" + scan_why
                else:
                    why_append = u"This post would not have been caught otherwise."

                # Add user to blacklist *after* post is scanned
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)

                why = u"Post manually reported by user *{}* from metasmoke.\n\n{}".format(
                    message["report"]["user"], why_append)

                spamhandling.handle_spam(post=post,
                                         reasons=["Manually reported " + post_data.post_type],
                                         why=why)
            elif "deploy_updated" in message:
                sha = message["deploy_updated"]["head_commit"]["id"]
                if sha != os.popen('git log -1 --pretty="%H"').read():
                    if "autopull" in message["deploy_updated"]["head_commit"]["message"]:
                        if only_blacklists_changed(GitManager.get_remote_diff()):
                            commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \
                                        .format(sha[:7])
                            i = []  # Currently no issues with backlists
                            for bl_file in glob('bad_*.txt') + glob('blacklisted_*.txt'):  # Check blacklists for issues
                                with open(bl_file, 'r') as lines:
                                    seen = dict()
                                    for lineno, line in enumerate(lines, 1):
                                        if line.endswith('\r\n'):
                                            i.append("DOS line ending at `{0}:{1}` in {2}".format(bl_file, lineno,
                                                                                                  commit_md))
                                        if not line.endswith('\n'):
                                            i.append("No newline at end of `{0}` in {1}".format(bl_file, commit_md))
                                        if line == '\n':
                                            i.append("Blank line at `{0}:{1}` in {2}".format(bl_file, lineno,
                                                                                             commit_md))
                                        if line in seen:
                                            i.append("Duplicate entry of {0} at lines {1} and {2} of {3} in {4}"
                                                     .format(line.rstrip('\n'), seen[line], lineno, bl_file, commit_md))
                                        seen[line] = lineno
                            if i == []:  # No issues
                                GitManager.pull_remote()
                                load_blacklists()
                                chatcommunicate.tell_rooms_with("debug", "No code modified in {0}, only blacklists"
                                                                " reloaded.".format(commit_md))
                            else:
                                i.append("please fix before pulling.")
                                chatcommunicate.tell_rooms_with("debug", ", ".join(i))
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen('git log -1 --pretty="%H"').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha})"\
                                " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"],
                                                                                             commit_sha=sha)
                            chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci")
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)

                            chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci")
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                            "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                        chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci")
            elif "everything_is_broken" in message:
                if message["everything_is_broken"] is True:
                    os._exit(6)
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data['message']
        if isinstance(message, Iterable):
            if "message" in message:
                chatcommunicate.tell_rooms_with("debug", message['message'])
            elif "exit" in message:
                os._exit(message["exit"])
            elif "blacklist" in message:
                datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']),
                                                  "metasmoke", message['blacklist']['post'])
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                        and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)
                why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"])
                postobj = classes.Post(api_response={'title': post_data.title, 'body': post_data.body,
                                                     'owner': {'display_name': post_data.owner_name,
                                                               'reputation': post_data.owner_rep,
                                                               'link': post_data.owner_url},
                                                     'site': post_data.site,
                                                     'is_answer': (post_data.post_type == "answer"),
                                                     'score': post_data.score, 'link': post_data.post_url,
                                                     'question_id': post_data.post_id,
                                                     'up_vote_count': post_data.up_vote_count,
                                                     'down_vote_count': post_data.down_vote_count})
                spamhandling.handle_spam(post=postobj,
                                         reasons=["Manually reported " + post_data.post_type],
                                         why=why)
            elif "deploy_updated" in message:
                sha = message["deploy_updated"]["head_commit"]["id"]
                if sha != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if "autopull" in message["deploy_updated"]["head_commit"]["message"]:
                        if only_blacklists_changed(GitManager.get_remote_diff()):
                            commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \
                                        .format(sha[:7])
                            i = []  # Currently no issues with backlists
                            for bl_file in glob('bad_*.txt') + glob('blacklisted_*.txt'):  # Check blacklists for issues
                                with open(bl_file, 'r') as lines:
                                    seen = dict()
                                    for lineno, line in enumerate(lines, 1):
                                        if line.endswith('\r\n'):
                                            i.append("DOS line ending at `{0}:{1}` in {2}".format(bl_file, lineno,
                                                                                                  commit_md))
                                        if not line.endswith('\n'):
                                            i.append("No newline at end of `{0}` in {1}".format(bl_file, commit_md))
                                        if line == '\n':
                                            i.append("Blank line at `{0}:{1}` in {2}".format(bl_file, lineno,
                                                                                             commit_md))
                                        if line in seen:
                                            i.append("Duplicate entry of {0} at lines {1} and {2} of {3} in {4}"
                                                     .format(line.rstrip('\n'), seen[line], lineno, bl_file, commit_md))
                                        seen[line] = lineno
                            if i == []:  # No issues
                                GitManager.pull_remote()
                                load_blacklists()
                                chatcommunicate.tell_rooms_with("debug", "No code modified in {0}, only blacklists"
                                                                " reloaded.".format(commit_md))
                            else:
                                i.append("please fix before pulling.")
                                chatcommunicate.tell_rooms_with("debug", ", ".join(i))
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha})"\
                                " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"],
                                                                                             commit_sha=sha)
                            chatcommunicate.tell_rooms_with("debug", s)
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)

                            chatcommunicate.tell_rooms_with("debug", s)
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                            "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                        chatcommunicate.tell_rooms_with("debug", s)
            elif "everything_is_broken" in message:
                if message["everything_is_broken"] is True:
                    os._exit(6)
def handle_commands(content_lower, message_parts, ev_room, ev_user_id, ev_user_name, wrap2, content):
    second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower()
    if re.compile(":[0-9]+").search(message_parts[0]):
        msg_id = int(message_parts[0][1:])
        msg = wrap2.get_message(msg_id)
        msg_content = msg.content_source
        quiet_action = ("-" in message_parts[1].lower())
        if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None:
            return
        post_url = fetch_post_url_from_msg_content(msg_content)
        post_site_id = fetch_post_id_and_site_from_msg_content(msg_content)
        if post_site_id is not None:
            post_type = post_site_id[2]
        else:
            post_type = None
        if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            add_false_positive((post_site_id[0], post_site_id[1]))
            user_added = False
            if message_parts[1].lower().startswith("falseu") or message_parts[1].lower().startswith("fpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_whitelisted_user(user)
                        user_added = True
            learned = False
            if post_type == "question":
                learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "good")
                if learned and user_added and not quiet_action:
                    return "Registered question as false positive, whitelisted user and added title to Bayesian doctype 'good'."
                elif learned and not quiet_action:
                    return "Registered question as false positive and added title to Bayesian doctype 'good'."
                elif not learned:
                    return "Registered question as false positive, but could not add title to Bayesian doctype 'good'."
            elif post_type == "answer":
                if user_added and not quiet_action:
                    return "Registered answer as false positive and whitelisted user."
                elif not quiet_action:
                    return "Registered answer as false positive."
            try:
                msg.delete()
            except:
                pass
        if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \
                and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."

            t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post,
                                 args=(post_url, second_part_lower, ev_user_name, ))
            t_metasmoke.start()

            learned = False
            user_added = False
            if message_parts[1].lower().startswith("trueu") or message_parts[1].lower().startswith("tpu"):
                url_from_msg = fetch_owner_url_from_msg_content(msg_content)
                if url_from_msg is not None:
                    user = get_user_from_url(url_from_msg)
                    if user is not None:
                        add_blacklisted_user(user)
                        user_added = True
            if post_type == "question":
                learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "bad")
                if learned and user_added and not quiet_action:
                    return "Blacklisted user and registered question as true positive: added title to the Bayesian doctype 'bad'."
                elif learned and not quiet_action:
                    return "Registered question as true positive: added title to the Bayesian doctype 'bad'."
                elif not learned:
                    return "Something went wrong when registering question as true positive."
            elif post_type == "answer":
                if user_added and not quiet_action:
                    return "Blacklisted user."
                elif not user_added:
                    return "`true`/`tp` cannot be used for answers because their job is to add the title of the *question* to the Bayesian doctype 'bad'. If you want to blacklist the poster of the answer, use `trueu` or `tpu`."
        if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2):
            if post_site_id is None:
                return "That message is not a report."
            add_ignored_post(post_site_id[0:2])
            if not quiet_action:
                return "Post ignored; alerts about it will no longer be posted."
        if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone")
                or second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2):
            try:
                msg.delete()
            except:
                pass  # couldn't delete message
        if second_part_lower.startswith("why"):
            t = fetch_post_id_and_site_from_msg_content(msg_content)
            if t is None:
                return "That's not a report."
            post_id, site, _ = t
            why = get_why(site, post_id)
            if why is None or why == "":
                return "There is no `why` data for that post (anymore)."
            else:
                return why
    if content_lower.startswith("!!/addblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_blacklisted_user((uid, val))
            return "User blacklisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`."
    if content_lower.startswith("!!/rmblu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if remove_blacklisted_user((uid, val)):
                return "User removed from blacklist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`."
    if content_lower.startswith("!!/isblu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_blacklisted_user((uid, val)):
                return "User is blacklisted. (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not blacklisted. (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`."
    if content_lower.startswith("!!/addwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            add_whitelisted_user((uid, val))
            return "User whitelisted (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`."
    if content_lower.startswith("!!/rmwlu") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        uid, val = get_user_from_list_command(content_lower)
        if uid != -1 and val != "":
            if remove_whitelisted_user((uid, val)):
                return "User removed from whitelist (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted."
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`."
    if content_lower.startswith("!!/iswlu"):
        uid, val = get_user_from_list_command(content_lower)
        if uid > -1 and val != "":
            if is_whitelisted_user((uid, val)):
                return "User is whitelisted. (`{}` on `{}`).".format(uid, val)
            else:
                return "User is not whitelisted. (`{}` on `{}`).".format(uid, val)
        elif uid == -2:
            return "Error: {}".format(val)
        else:
            return "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`."
    if content_lower.startswith("!!/report") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        if len(message_parts) < 2:
            return "Not enough arguments."
        url = message_parts[1]
        post_data = api_get_post(url)
        if post_data is None:
            return "That does not look like a valid post URL."
        if post_data is False:
            return "Could not find data for this post in the API. Check whether the post is not deleted yet."
        user = get_user_from_url(post_data.owner_url)
        if user is not None:
            add_blacklisted_user(user)
        bayesian_learn_title(post_data.title, "bad")
        handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url,
                    post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type],
                    post_data.post_type == "answer")
    if content_lower.startswith("!!/wut"):
        return "Whaddya mean, 'wut'? Humans..."
    if content_lower.startswith("!!/lick"):
        return "*licks ice cream cone*"
    if content_lower.startswith("!!/alive"):
        if ev_room == GlobalVars.charcoal_room_id:
            return 'Of course'
        elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id:
            return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?',
                                  'plz send teh coffee',
                                  'Watching this endless list of new questions *never* gets boring',
                                  'Kinda sorta'])
    if content_lower.startswith("!!/rev"):
            return '[' + \
                GlobalVars.commit_with_author + \
                '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \
                GlobalVars.commit + \
                ')'
    if content_lower.startswith("!!/status"):
            now = datetime.utcnow()
            diff = now - UtcDate.startup_utc_date
            minutes, remainder = divmod(diff.seconds, 60)
            minutestr = "minutes" if minutes != 1 else "minute"
            return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr)
    if content_lower.startswith("!!/reboot"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(5)
    if content_lower.startswith("!!/stappit"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            post_message_in_room(ev_room, "Goodbye, cruel world")
            os._exit(6)
    if content_lower.startswith("!!/master"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            os._exit(8)
    if content_lower.startswith("!!/clearbl"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            if os.path.isfile("blacklistedUsers.txt"):
                os.remove("blacklistedUsers.txt")
                GlobalVars.blacklisted_users = []
                return "Kaboom, blacklisted users cleared."
            else:
                return "There are no blacklisted users at the moment."
    if content_lower.startswith("!!/block"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            timeToBlock = content_lower[9:].strip()
            timeToBlock = int(timeToBlock) if timeToBlock else 0
            if 0 < timeToBlock < 14400:
                GlobalVars.blockedTime = time.time() + timeToBlock
            else:
                GlobalVars.blockedTime = time.time() + 900
            return "blocked"
    if content_lower.startswith("!!/unblock"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            GlobalVars.blockedTime = time.time()
            return "unblocked"
    if content_lower.startswith("!!/errorlogs"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            count = -1
            if len(message_parts) != 2:
                return "The !!/errorlogs command requires 1 argument."
            try:
                count = int(message_parts[1])
            except ValueError:
                pass
            if count == -1:
                return "Invalid argument."
            logs_part = fetch_lines_from_error_log(count)
            post_message_in_room(ev_room, logs_part, False)
    if content_lower.startswith("!!/pull"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master')
            latest_sha = r.json()["object"]["sha"]
            r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses')
            states = []
            for status in r.json():
                state = status["state"]
                states.append(state)
            if "success" in states:
                os._exit(3)
            elif "error" in states or "failure" in states:
                return "CI build failed! :( Please check your commit."
            elif "pending" in states or not states:
                return "CI build is still pending, wait until the build has finished and then pull again."
    if content_lower.startswith("!!/help"):
        return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and low-quality posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)."
    if content_lower.startswith("!!/apiquota"):
        return GlobalVars.apiquota
    if content_lower.startswith("!!/whoami"):
        if (ev_room in GlobalVars.smokeDetector_user_id):
            return "My id for this room is {}".format(GlobalVars.smokeDetector_user_id[ev_room])
        else:
            return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)"
    if content_lower.startswith("!!/location"):
        return GlobalVars.location
    if content_lower.startswith("!!/queuestatus"):
        post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False)
    if content_lower.startswith("!!/blame") and ev_room == GlobalVars.meta_tavern_room_id:
        GlobalVars.tavern_users_chatting = list(set(GlobalVars.tavern_users_chatting))  # Make unique
        user_to_blame = random.choice(GlobalVars.tavern_users_chatting)
        return "It's " + user_to_blame + "'s fault."
    if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()):
        return "Liar"
    if content_lower.startswith("!!/coffee"):
        return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/tea"):
        return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/test"):
        string_to_test = content[8:]
        if len(string_to_test) == 0:
            return "Nothing to test"
        result = "> "
        reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False)
        if len(reasons) == 0:
            result += "Would not be caught for title, body and username."
            return result
        result += ", ".join(reasons).capitalize()
        if why is not None and len(why) > 0:
            result += "\n----------\n"
            result += why
        return result

    return None
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data['message']
        if isinstance(message, Iterable):
            if "message" in message:
                GlobalVars.charcoal_hq.send_message(message['message'])
            elif "blacklist" in message:
                datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']),
                                                  "metasmoke", message['blacklist']['post'])
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                        and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)
                why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"])
                spamhandling.handle_spam(title=post_data.title,
                                         body=post_data.body,
                                         poster=post_data.owner_name,
                                         site=post_data.site,
                                         post_url=post_data.post_url,
                                         poster_url=post_data.owner_url,
                                         post_id=post_data.post_id,
                                         reasons=["Manually reported " + post_data.post_type],
                                         is_answer=post_data.post_type == "answer",
                                         why=why,
                                         owner_rep=post_data.owner_rep,
                                         post_score=post_data.score,
                                         up_vote_count=post_data.up_vote_count,
                                         down_vote_count=post_data.down_vote_count,
                                         question_id=post_data.question_id)
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha})"\
                                " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"],
                                                                                             commit_sha=sha)
                            GlobalVars.charcoal_hq.send_message(s)
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                            "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                    # noinspection PyUnboundLocalVariable
                    GlobalVars.charcoal_hq.send_message(s)
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data['message']
        if isinstance(message, Iterable):
            if "message" in message:
                GlobalVars.charcoal_hq.send_message(message['message'])
            elif "exit" in message:
                os._exit(message["exit"])
            elif "blacklist" in message:
                datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']),
                                                  "metasmoke", message['blacklist']['post'])
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                        and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)
                why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"])
                postobj = classes.Post(api_response={'title': post_data.title, 'body': post_data.body,
                                                     'owner': {'display_name': post_data.owner_name,
                                                               'reputation': post_data.owner_rep,
                                                               'link': post_data.owner_url},
                                                     'site': post_data.site,
                                                     'IsAnswer': (post_data.post_type == "answer"),
                                                     'score': post_data.score, 'link': post_data.post_url,
                                                     'question_id': post_data.post_id,
                                                     'up_vote_count': post_data.up_vote_count,
                                                     'down_vote_count': post_data.down_vote_count})
                spamhandling.handle_spam(post=postobj,
                                         reasons=["Manually reported " + post_data.post_type],
                                         why=why)
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha})"\
                                " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"],
                                                                                             commit_sha=sha)
                            GlobalVars.charcoal_hq.send_message(s)
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                            "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                    # noinspection PyUnboundLocalVariable
                    GlobalVars.charcoal_hq.send_message(s)
Exemple #18
0
def allspam(msg, url):
    """
    Reports all of a user's posts as spam
    :param msg:
    :param url: A user profile URL
    :return:
    """
    crn, wait = can_report_now(msg.owner.id, msg._client.host)
    if not crn:
        raise CmdException("You can execute the !!/allspam command again in {} seconds. "
                           "To avoid one user sending lots of reports in a few commands and "
                           "slowing SmokeDetector down due to rate-limiting, you have to "
                           "wait 30 seconds after you've reported multiple posts in "
                           "one go.".format(wait))
    user = get_user_from_url(url)
    if user is None:
        raise CmdException("That doesn't look like a valid user URL.")
    user_sites = []
    user_posts = []
    # Detect whether link is to network profile or site profile
    if user[1] == 'stackexchange.com':
        # Respect backoffs etc
        GlobalVars.api_request_lock.acquire()
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        # Fetch sites
        api_filter = "!6Pbp)--cWmv(1"
        request_url = "http://api.stackexchange.com/2.2/users/{}/associated?filter={}&key=IAkbitmze4B8KpacUfLqkw((" \
            .format(user[0], api_filter)
        res = requests.get(request_url).json()
        if "backoff" in res:
            if GlobalVars.api_backoff_time < time.time() + res["backoff"]:
                GlobalVars.api_backoff_time = time.time() + res["backoff"]
        GlobalVars.api_request_lock.release()
        if 'items' not in res or len(res['items']) == 0:
            raise CmdException("The specified user does not appear to exist.")
        if res['has_more']:
            raise CmdException("The specified user has an abnormally high number of accounts. Please consider flagging "
                               "for moderator attention, otherwise use !!/report on the user's posts individually.")
        # Add accounts with posts
        for site in res['items']:
            if site['question_count'] > 0 or site['answer_count'] > 0:
                user_sites.append((site['user_id'], get_api_sitename_from_url(site['site_url'])))
    else:
        user_sites.append((user[0], get_api_sitename_from_url(user[1])))
    # Fetch posts
    for u_id, u_site in user_sites:
        # Respect backoffs etc
        GlobalVars.api_request_lock.acquire()
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        # Fetch posts
        api_filter = "!)Q4RrMH0DC96Y4g9yVzuwUrW"
        request_url = "http://api.stackexchange.com/2.2/users/{}/posts?site={}&filter={}&key=IAkbitmze4B8KpacUfLqkw((" \
            .format(u_id, u_site, api_filter)
        res = requests.get(request_url).json()
        if "backoff" in res:
            if GlobalVars.api_backoff_time < time.time() + res["backoff"]:
                GlobalVars.api_backoff_time = time.time() + res["backoff"]
        GlobalVars.api_request_lock.release()
        if 'items' not in res or len(res['items']) == 0:
            raise CmdException("The specified user has no posts on this site.")
        posts = res['items']
        if posts[0]['owner']['reputation'] > 100:
            raise CmdException("The specified user's reputation is abnormally high. Please consider flagging for "
                               "moderator attention, otherwise use !!/report on the posts individually.")
        # Add blacklisted user - use most downvoted post as post URL
        message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id)
        add_blacklisted_user(user, message_url, sorted(posts, key=lambda x: x['score'])[0]['owner']['link'])
        # TODO: Postdata refactor, figure out a better way to use apigetpost
        for post in posts:
            post_data = PostData()
            post_data.post_id = post['post_id']
            post_data.post_url = url_to_shortlink(post['link'])
            *discard, post_data.site, post_data.post_type = fetch_post_id_and_site_from_url(
                url_to_shortlink(post['link']))
            post_data.title = unescape(post['title'])
            post_data.owner_name = unescape(post['owner']['display_name'])
            post_data.owner_url = post['owner']['link']
            post_data.owner_rep = post['owner']['reputation']
            post_data.body = post['body']
            post_data.score = post['score']
            post_data.up_vote_count = post['up_vote_count']
            post_data.down_vote_count = post['down_vote_count']
            if post_data.post_type == "answer":
                # Annoyingly we have to make another request to get the question ID, since it is only returned by the
                # /answers route
                # Respect backoffs etc
                GlobalVars.api_request_lock.acquire()
                if GlobalVars.api_backoff_time > time.time():
                    time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
                # Fetch posts
                filter = "!*Jxb9s5EOrE51WK*"
                req_url = "http://api.stackexchange.com/2.2/answers/{}?site={}&filter={}&key=IAkbitmze4B8KpacUfLqkw((" \
                    .format(post['post_id'], u_site, filter)
                answer_res = requests.get(req_url).json()
                if "backoff" in res:
                    if GlobalVars.api_backoff_time < time.time() + res["backoff"]:
                        GlobalVars.api_backoff_time = time.time() + res["backoff"]
                GlobalVars.api_request_lock.release()
                # Finally, set the attribute
                post_data.question_id = answer_res['items'][0]['question_id']
                post_data.is_answer = True
            user_posts.append(post_data)
    if len(user_posts) == 0:
        raise CmdException("The specified user hasn't posted anything.")
    if len(user_posts) > 15:
        raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging "
                           "for moderator attention, otherwise use !!/report on the posts individually.")
    why_info = u"User manually reported by *{}* in room *{}*.\n".format(msg.owner.name, msg.room.name)
    # Handle all posts
    for index, post in enumerate(user_posts, start=1):
        batch = ""
        if len(user_posts) > 1:
            batch = " (batch report: post {} out of {})".format(index, len(user_posts))
        handle_spam(post=Post(api_response=post.as_dict),
                    reasons=["Manually reported " + post.post_type + batch],
                    why=why_info)
        time.sleep(2)  # Should this be implemented differently?
    if len(user_posts) > 2:
        add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time())
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)
        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            min_query = ""
            if self.last_activity_date != 0:
                min_query = "&min=" + str(self.last_activity_date)
                pagesize = "50"
            else:
                pagesize = "25"
            url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query
        else:
            url = "http://api.stackexchange.com/2.2/questions/" + ";".join(
                str(x) for x in posts
            ) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw(("
        # wait to make sure API has/updates post data
        time.sleep(60)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        if "quota_remaining" in response:
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            GlobalVars.apiquota = 0
            return

        if site == "stackoverflow.com":
            if len(response["items"]
                   ) > 0 and "last_activity_date" in response["items"][0]:
                self.last_activity_date = response["items"][0][
                    "last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            try:
                owner_name = GlobalVars.parser.unescape(
                    post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            if owner_rep <= 50:
                is_spam, reason, why = check_if_spam(title, body, owner_name,
                                                     owner_link, site, q_id,
                                                     False, False)
                if is_spam:
                    try:
                        handle_spam(title, body, owner_name, site, link,
                                    owner_link, q_id, reason, False, why)
                    except:
                        print "NOP"

            classified, gibberish_score = classify_gibberish(body, site)
            if classified and gibberish_score >= 65:
                GlobalVars.bayesian_testroom.send_message(
                    "[ SmokeDetector | GibberishClassifierBeta ] "
                    u"Potential gibberish body ({}%): [{}]({}) on `{}`".format(
                        gibberish_score, title, link, site))
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    try:
                        owner_name = GlobalVars.parser.unescape(
                            answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    if owner_rep <= 50:
                        is_spam, reason, why = check_if_spam(
                            answer_title, body, owner_name, owner_link, site,
                            a_id, True, False)
                        if is_spam:
                            try:
                                handle_spam(title, body, owner_name, site,
                                            link, owner_link, a_id, reason,
                                            True, why)
                            except:
                                print "NOP"

                    classified, gibberish_score = classify_gibberish(
                        body, site)
                    if classified and gibberish_score >= 65:
                        GlobalVars.bayesian_testroom.send_message(
                            "[ SmokeDetector | GibberishClassifierBeta ] "
                            u"Potential gibberish answer ({}%): [{}]({}) on `{}`"
                            .format(gibberish_score, title, link, site))
            except:
                print "no answers"
        return
Exemple #20
0
def report(msg, urls):
    """
    Report a post (or posts)
    :param msg:
    :param urls:
    :return: A string (or None)
    """
    crn, wait = can_report_now(msg.owner.id, msg._client.host)
    if not crn:
        raise CmdException("You can execute the !!/report command again in {} seconds. "
                           "To avoid one user sending lots of reports in a few commands and "
                           "slowing SmokeDetector down due to rate-limiting, you have to "
                           "wait 30 seconds after you've reported multiple posts in "
                           "one go.".format(wait))

    output = []
    urls = list(set(urls.split()))

    if len(urls) > 5:
        raise CmdException("To avoid SmokeDetector reporting posts too slowly, you can "
                           "report at most 5 posts at a time. This is to avoid "
                           "SmokeDetector's chat messages getting rate-limited too much, "
                           "which would slow down reports.")

    for index, url in enumerate(urls, start=1):
        post_data = api_get_post(url)

        if post_data is None:
            output.append("Post {}: That does not look like a valid post URL.".format(index))
            continue

        if post_data is False:
            output.append("Post {}: Could not find data for this post in the API. "
                          "It may already have been deleted.".format(index))
            continue

        if has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_false_positive(
                (post_data.post_id, post_data.site)):
            # Don't re-report if the post wasn't marked as a false positive. If it was marked as a false positive,
            # this re-report might be attempting to correct that/fix a mistake/etc.

            if GlobalVars.metasmoke_key is not None:
                se_link = to_protocol_relative(post_data.post_url)
                ms_link = "https://m.erwaysoftware.com/posts/by-url?url={}".format(se_link)
                output.append("Post {}: Already recently reported [ [MS]({}) ]".format(index, ms_link))
                continue
            else:
                output.append("Post {}: Already recently reported".format(index))
                continue

        post_data.is_answer = (post_data.post_type == "answer")
        post = Post(api_response=post_data.as_dict)
        user = get_user_from_url(post_data.owner_url)

        if user is not None:
            message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id)
            add_blacklisted_user(user, message_url, post_data.post_url)

        why_info = u"Post manually reported by user *{}* in room *{}*.\n".format(msg.owner.name, msg.room.name)
        batch = ""
        if len(urls) > 1:
            batch = " (batch report: post {} out of {})".format(index, len(urls))

        handle_spam(post=post,
                    reasons=["Manually reported " + post_data.post_type + batch],
                    why=why_info)

    if 1 < len(urls) > len(output):
        add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time())

    if len(output) > 0:
        return os.linesep.join(output)
def command_report_post(ev_room, ev_user_id, wrap2, message_parts, message_url,
                        ev_user_name, ev_room_name, *args, **kwargs):
    """
    Report a post (or posts)
    :param ev_room_name:
    :param ev_user_name:
    :param message_url:
    :param message_parts:
    :param wrap2:
    :param ev_user_id:
    :param ev_room:
    :param kwargs: No additional arguments expected
    :return: A string (or None)
    """
    crn, wait = can_report_now(ev_user_id, wrap2.host)
    if not crn:
        return Response(command_status=False, message="You can execute the !!/report command again in {} seconds. "
                                                      "To avoid one user sending lots of reports in a few commands and "
                                                      "slowing SmokeDetector down due to rate-limiting, you have to "
                                                      "wait 30 seconds after you've reported multiple posts using "
                                                      "!!/report, even if your current command just has one URL. (Note "
                                                      "that this timeout won't be applied if you only used !!/report "
                                                      "for one post)".format(wait))
    if len(message_parts) < 2:
        return Response(command_status=False, message="Not enough arguments.")
    output = []
    index = 0
    urls = list(set(message_parts[1:]))
    if len(urls) > 5:
        return Response(command_status=False, message="To avoid SmokeDetector reporting posts too slowly, you can "
                                                      "report at most 5 posts at a time. This is to avoid "
                                                      "SmokeDetector's chat messages getting rate-limited too much, "
                                                      "which would slow down reports.")
    for url in urls:
        index += 1
        post_data = api_get_post(url)
        if post_data is None:
            output.append("Post {}: That does not look like a valid post URL.".format(index))
            continue
        if post_data is False:
            output.append("Post {}: Could not find data for this post in the API. "
                          "It may already have been deleted.".format(index))
            continue
        user = get_user_from_url(post_data.owner_url)
        if user is not None:
            add_blacklisted_user(user, message_url, post_data.post_url)
        why = u"Post manually reported by user *{}* in room *{}*.\n".format(ev_user_name,
                                                                            ev_room_name.decode('utf-8'))
        batch = ""
        if len(urls) > 1:
            batch = " (batch report: post {} out of {})".format(index, len(urls))
        handle_spam(title=post_data.title,
                    body=post_data.body,
                    poster=post_data.owner_name,
                    site=post_data.site,
                    post_url=post_data.post_url,
                    poster_url=post_data.owner_url,
                    post_id=post_data.post_id,
                    reasons=["Manually reported " + post_data.post_type + batch],
                    is_answer=post_data.post_type == "answer",
                    why=why,
                    owner_rep=post_data.owner_rep,
                    post_score=post_data.score,
                    up_vote_count=post_data.up_vote_count,
                    down_vote_count=post_data.down_vote_count,
                    question_id=post_data.question_id)
    if 1 < len(urls) > len(output):
        add_or_update_multiple_reporter(ev_user_id, wrap2.host, time.time())
    if len(output) > 0:
        return Response(command_status=True, message=os.linesep.join(output))
    return Response(command_status=True, message=None)
    def make_api_call_for_site(self, site):
        self.queue_modify_lock.acquire()
        if site not in self.queue:
            GlobalVars.charcoal_hq.send_message(
                "Attempted API call to {} but there are no posts to fetch.".
                format(site))
            return
        posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}&min={time_length}".format(
                pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join(
                str(post) for post in posts))

        url = "http://api.stackexchange.com/2.2/questions{q_modifier}?site={site}&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(({optional_min_query_param}".format(
            q_modifier=question_modifier,
            site=site,
            optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response[
                    "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message(
                    "API quota rolled over with {0} requests remaining. Current quota: {1}."
                    .format(GlobalVars.apiquota, response["quota_remaining"]))
                sorted_calls_per_site = sorted(
                    GlobalVars.api_calls_per_site.items(),
                    key=itemgetter(1),
                    reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    api_quota_used_per_site += site_name.replace(
                        '.com', '').replace('.stackexchange',
                                            '') + ": {0}\n".format(
                                                str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site,
                                                    False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message(
                    "API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(
                    str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message(
                    "Restart: API quota is {quota}.".format(
                        quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {}.".format(response["error_message"])

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]
            match = regex.compile('/2.2/([^.]*)').search(url)
            url_part = match.group(1) if match else url
            message_hq += "\nBackoff received of {} seconds on request to `{}`".format(
                str(response["backoff"]), url_part)

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(
                    post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title, body, owner_name,
                                                 owner_link, site, q_id, False,
                                                 False, owner_rep, post_score)
            if is_spam:
                try:
                    handle_spam(title, body, owner_name, site, link,
                                owner_link, q_id, reason, False, why,
                                owner_rep, post_score, up_vote_count,
                                down_vote_count, None)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(
                            answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(
                        answer_title, body, owner_name, owner_link, site, a_id,
                        True, False, owner_rep, post_score)
                    if is_spam:
                        try:
                            handle_spam(title, body, owner_name, site, link,
                                        owner_link, a_id, reason, True, why,
                                        owner_rep, post_score, up_vote_count,
                                        down_vote_count, q_id)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
Exemple #23
0
    def init_websocket(self):
        try:
            GlobalVars.metasmoke_ws = websocket.create_connection(GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host)
            GlobalVars.metasmoke_ws.send(json.dumps({"command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\",\"key\":\"" + GlobalVars.metasmoke_key + "\"}"}))

            while True:
                a = GlobalVars.metasmoke_ws.recv()
                print(a)
                try:
                    data = json.loads(a)
                    if "message" in data:
                        message = data['message']
                        if isinstance(message, Iterable):
                            if "message" in message:
                                GlobalVars.charcoal_hq.send_message(message['message'])
                            elif "blacklist" in message:
                                datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post'])
                            elif "naa" in message:
                                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                                datahandling.add_ignored_post(post_site_id[0:2])
                            elif "fp" in message:
                                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                                datahandling.add_false_positive(post_site_id[0:2])
                            elif "report" in message:
                                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                                if post_data is None or post_data is False:
                                    continue
                                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                                    continue
                                user = parsing.get_user_from_url(post_data.owner_url)
                                if user is not None:
                                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)
                                why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"])
                                spamhandling.handle_spam(title=post_data.title,
                                                         body=post_data.body,
                                                         poster=post_data.owner_name,
                                                         site=post_data.site,
                                                         post_url=post_data.post_url,
                                                         poster_url=post_data.owner_url,
                                                         post_id=post_data.post_id,
                                                         reasons=["Manually reported " + post_data.post_type],
                                                         is_answer=post_data.post_type == "answer",
                                                         why=why,
                                                         owner_rep=post_data.owner_rep,
                                                         post_score=post_data.score,
                                                         up_vote_count=post_data.up_vote_count,
                                                         down_vote_count=post_data.down_vote_count,
                                                         question_id=post_data.question_id)
                            elif "commit_status" in message:
                                c = message["commit_status"]
                                sha = c["commit_sha"][:7]
                                if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read():
                                    if c["status"] == "success":
                                        if "autopull" in c["commit_message"]:
                                            GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha))
                                            time.sleep(2)
                                            os._exit(3)
                                        else:
                                            GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} succeeded.".format(ci_link=c["ci_url"], commit_sha=sha))
                                    elif c["status"] == "failure":
                                        GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} failed.".format(ci_link=c["ci_url"], commit_sha=sha))
                except Exception, e:
                    GlobalVars.metasmoke_ws = websocket.create_connection(GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host)
                    GlobalVars.metasmoke_ws.send(json.dumps({"command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\"}"}))
                    print e
                    try:
                        exc_info = sys.exc_info()
                        traceback.print_exception(*exc_info)
                    except:
                        print "meh"
        except:
            print "Couldn't bind to MS websocket"
Exemple #24
0
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data['message']
        if isinstance(message, Iterable):
            if "message" in message:
                GlobalVars.charcoal_hq.send_message(message['message'])
            elif "exit" in message:
                os._exit(message["exit"])
            elif "blacklist" in message:
                datahandling.add_blacklisted_user(
                    (message['blacklist']['uid'],
                     message['blacklist']['site']), "metasmoke",
                    message['blacklist']['post'])
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(
                    message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(
                    message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(
                    message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \
                        and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke",
                                                      post_data.post_url)
                why = u"Post manually reported by user *{}* from metasmoke.\n".format(
                    message["report"]["user"])
                postobj = classes.Post(
                    api_response={
                        'title': post_data.title,
                        'body': post_data.body,
                        'owner': {
                            'display_name': post_data.owner_name,
                            'reputation': post_data.owner_rep,
                            'link': post_data.owner_url
                        },
                        'site': post_data.site,
                        'IsAnswer': (post_data.post_type == "answer"),
                        'score': post_data.score,
                        'link': post_data.post_url,
                        'question_id': post_data.post_id,
                        'up_vote_count': post_data.up_vote_count,
                        'down_vote_count': post_data.down_vote_count
                    })
                spamhandling.handle_spam(
                    post=postobj,
                    reasons=["Manually reported " + post_data.post_type],
                    why=why)
            elif "deploy_updated" in message:
                sha = message["deploy_updated"]["head_commit"]["id"]
                if sha != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if "autopull" in message["deploy_updated"]["head_commit"][
                            "message"]:
                        if only_blacklists_changed(
                                GitManager.get_remote_diff()):
                            commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \
                                        .format(sha[:7])
                            i = []  # Currently no issues with backlists
                            for bl_file in glob('bad_*.txt') + glob(
                                    'blacklisted_*.txt'
                            ):  # Check blacklists for issues
                                with open(bl_file, 'r') as lines:
                                    seen = dict()
                                    for lineno, line in enumerate(lines, 1):
                                        if line.endswith('\r\n'):
                                            i.append(
                                                "DOS line ending at `{0}:{1}` in {2}"
                                                .format(
                                                    bl_file, lineno,
                                                    commit_md))
                                        if not line.endswith('\n'):
                                            i.append(
                                                "No newline at end of `{0}` in {1}"
                                                .format(bl_file, commit_md))
                                        if line == '\n':
                                            i.append(
                                                "Blank line at `{0}:{1}` in {2}"
                                                .format(
                                                    bl_file, lineno,
                                                    commit_md))
                                        if line in seen:
                                            i.append(
                                                "Duplicate entry of {0} at lines {1} and {2} of {3} in {4}"
                                                .format(
                                                    line.rstrip('\n'),
                                                    seen[line], lineno,
                                                    bl_file, commit_md))
                                        seen[line] = lineno
                            if i == []:  # No issues
                                GitManager.pull_remote()
                                load_blacklists()
                                GlobalVars.charcoal_hq.send_message(
                                    "No code modified in {0}, only blacklists"
                                    " reloaded.".format(commit_md))
                            else:
                                i.append("please fix before pulling.")
                                GlobalVars.charcoal_hq.send_message(
                                    ", ".join(i))
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen(
                        'git log --pretty=format:"%H" -n 1').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha})"\
                                " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"],
                                                                                             commit_sha=sha)
                            GlobalVars.charcoal_hq.send_message(s)
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                                "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \
                            "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                    # noinspection PyUnboundLocalVariable
                    GlobalVars.charcoal_hq.send_message(s)

            elif "everything_is_broken" in message:
                if message["everything_is_broken"] is True:
                    os._exit(6)
Exemple #25
0
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)
        url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw(("
        # wait to make sure API has/updates post data
        time.sleep(60)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        if "quota_remaining" in response:
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            GlobalVars.apiquota = 0
            return

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False)
            if owner_rep <= 50 and is_spam:
                try:
                    handle_spam(title, owner_name, site, link, owner_link, q_id, reason, False)
                except:
                    print "NOP"

            classified, gibberish_score = classify_gibberish(body, site)
            if classified and gibberish_score >= 65:
                GlobalVars.bayesian_testroom.send_message(
                    "[ SmokeDetector | GibberishClassifierBeta ] "
                    "Potential gibberish body (%s%%): [%s](%s) on `%s`"
                    % (gibberish_score, title, link, site)
                )
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False)
                    if owner_rep <= 50 and is_spam:
                        try:
                            handle_spam(title, owner_name, site, link, owner_link, a_id, reason, True)
                        except:
                            print "NOP"

                    classified, gibberish_score = classify_gibberish(body, site)
                    if classified and gibberish_score >= 65:
                        GlobalVars.bayesian_testroom.send_message(
                            "[ SmokeDetector | GibberishClassifierBeta ] "
                            "Potential gibberish answer (%s%%): [%s](%s) on `%s`"
                            % (gibberish_score, title, link, site)
                        )
            except:
                print "no answers"
        return
def handle_commands(content_lower, message_parts, ev_room, ev_room_name, ev_user_id, ev_user_name, wrap2, content, message_id):
    if content_lower.startswith("!!/parse") \
            and is_privileged(ev_room, ev_user_id, wrap2):
        string_to_parse = content[9:]
        print string_to_parse
        response = requests.get("http://*****:*****@" + ev_user_name.replace(" ", "") + "*"
    if content_lower.startswith("!!/tea"):
        return "*brews a cup of {choice} tea for @{user}*".format(choice=random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint', 'jasmine']), user=ev_user_name.replace(" ", ""))
    if content_lower.startswith("!!/brownie"):
        return "Brown!"
    if content_lower.startswith("!!/hats"):
        wb_end = datetime(2016, 1, 4, 0, 0, 0)
        now = datetime.utcnow()
        if wb_end > now:
            diff = wb_end - now
            hours, remainder = divmod(diff.seconds, 3600)
            minutes, seconds = divmod(remainder, 60)
            daystr = "days" if diff.days != 1 else "day"
            hourstr = "hours" if hours != 1 else "hour"
            minutestr = "minutes" if minutes != 1 else "minute"
            secondstr = "seconds" if seconds != 1 else "second"
            return "HURRY UP AND EARN MORE HATS! Winterbash will be over in {} {}, {} {}, {} {}, and {} {}. :(".format(diff.days, daystr, hours, hourstr, minutes, minutestr, seconds, secondstr)

        return "Winterbash is over. :("
    if content_lower.startswith("!!/test"):
        string_to_test = content[8:]
        test_as_answer = False
        if content_lower.startswith("!!/test-a"):
            string_to_test = content[10:]
            test_as_answer = True
        if len(string_to_test) == 0:
            return "Nothing to test"
        result = "> "
        reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", test_as_answer, False, 1, 0)
        if len(reasons) == 0:
            result += "Would not be caught for title, {}, and username.".format("answer" if test_as_answer else "body")
            return result
        result += ", ".join(reasons).capitalize()
        if why is not None and len(why) > 0:
            result += "\n----------\n"
            result += why
        return result
    if content_lower.startswith("!!/amiprivileged"):
        if is_privileged(ev_room, ev_user_id, wrap2):
            return "Yes, you are a privileged user."

        return "No, you are not a privileged user."
    if content_lower.startswith("!!/notify"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."

        room_id = int(room_id)
        quiet_action = ("-" in message_parts[2])
        se_site = message_parts[2].replace('-', '')
        response, full_site = add_to_notification_list(user_id, chat_site, room_id, se_site)
        if response == 0:
            if quiet_action:
                return None

            return "You'll now get pings from me if I report a post on `{site_name}`, in room `{room_id}` on `chat.{chat_domain}`".format(site_name=se_site, room_id=room_id, chat_domain=chat_site)
        elif response == -1:
            return "That notification configuration is already registered."
        elif response == -2:
            return False, "The given SE site does not exist."
    if content_lower.startswith("!!/unnotify"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."

        room_id = int(room_id)
        quiet_action = ("-" in message_parts[2])
        se_site = message_parts[2].replace('-', '')
        response = remove_from_notification_list(user_id, chat_site, room_id, se_site)
        if response:
            if quiet_action:
                return None
            return "I will no longer ping you if I report a post on `{site_name}`, in room `{room_id}` on `chat.{chat_domain}`".format(site_name=se_site, room_id=room_id, chat_domain=chat_site)
        return "That configuration doesn't exist."
    if content_lower.startswith("!!/willibenotified"):
        if len(message_parts) != 3:
            return False, "2 arguments expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid"

        room_id = int(room_id)
        se_site = message_parts[2]
        will_be_notified = will_i_be_notified(user_id, chat_site, room_id, se_site)
        if will_be_notified:
            return "Yes, you will be notified for that site in that room."

        return "No, you won't be notified for that site in that room."
    if content_lower.startswith("!!/allnotificationsites"):
        if len(message_parts) != 2:
            return False, "1 argument expected"
        user_id = int(ev_user_id)
        chat_site = wrap2.host
        room_id = message_parts[1]
        if not room_id.isdigit():
            return False, "Room ID is invalid."
        sites = get_all_notification_sites(user_id, chat_site, room_id)
        if len(sites) == 0:
            return "You won't get notified for any sites in that room."

        return "You will get notified for these sites:\r\n" + ", ".join(sites)
    return False, None  # Unrecognized command, can be edited later.
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)
        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            min_query = ""
            if self.last_activity_date != 0:
                min_query = "&min=" + str(self.last_activity_date)
                pagesize = "50"
            else:
                pagesize = "25"
            url = (
                "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize="
                + pagesize
                + min_query
            )
        else:
            url = (
                "http://api.stackexchange.com/2.2/questions/"
                + ";".join(str(x) for x in posts)
                + "?site="
                + site
                + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw(("
            )
        # wait to make sure API has/updates post data
        time.sleep(60)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        if "quota_remaining" in response:
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            GlobalVars.apiquota = 0
            return

        if site == "stackoverflow.com":
            if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]:
                self.last_activity_date = response["items"][0]["last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            if owner_rep <= 50:
                is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False)
                if is_spam:
                    try:
                        handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why)
                    except:
                        print "NOP"

            classified, gibberish_score = classify_gibberish(body, site)
            if classified and gibberish_score >= 65:
                GlobalVars.bayesian_testroom.send_message(
                    "[ SmokeDetector | GibberishClassifierBeta ] "
                    u"Potential gibberish body ({}%): [{}]({}) on `{}`".format(gibberish_score, title, link, site)
                )
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    if owner_rep <= 50:
                        is_spam, reason, why = check_if_spam(
                            answer_title, body, owner_name, owner_link, site, a_id, True, False
                        )
                        if is_spam:
                            try:
                                handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why)
                            except:
                                print "NOP"

                    classified, gibberish_score = classify_gibberish(body, site)
                    if classified and gibberish_score >= 65:
                        GlobalVars.bayesian_testroom.send_message(
                            "[ SmokeDetector | GibberishClassifierBeta ] "
                            u"Potential gibberish answer ({}%): [{}]({}) on `{}`".format(
                                gibberish_score, title, link, site
                            )
                        )
            except:
                print "no answers"
        return
Exemple #28
0
    def make_api_call_for_site(self, site):
        if site not in self.queue:
            return

        self.queue_modify_lock.acquire()
        posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}" \
                                "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join(
                str(post) for post in posts))

        url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \
              "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \
              "{optional_min_query_param}".format(q_modifier=question_modifier, site=site,
                                                  optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        GlobalVars.api_request_lock.acquire()
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            time_request_made = datetime.now().strftime('%H:%M:%S')
            response = requests.get(url, timeout=20).json()
        except (requests.exceptions.Timeout, requests.ConnectionError,
                Exception):
            # Any failure in the request being made (timeout or otherwise) should be added back to
            # the queue.
            self.queue_modify_lock.acquire()
            if site in self.queue:
                self.queue[site].extend(posts)
            else:
                self.queue[site] = posts
            self.queue_modify_lock.release()
            return

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response[
                    "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message(
                    "API quota rolled over with {0} requests remaining. "
                    "Current quota: {1}.".format(GlobalVars.apiquota,
                                                 response["quota_remaining"]))
                sorted_calls_per_site = sorted(
                    GlobalVars.api_calls_per_site.items(),
                    key=itemgetter(1),
                    reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    sanatized_site_name = site_name.replace(
                        '.com', '').replace('.stackexchange', '')
                    api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(
                        str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site,
                                                    False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message(
                    "API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(
                    str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message(
                    "Restart: API quota is {quota}.".format(
                        quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {} at {} UTC.".format(
                response["error_message"], time_request_made)
            if "error_id" in response and response["error_id"] == 502:
                if GlobalVars.api_backoff_time < time.time(
                ) + 12:  # Add a backoff of 10 + 2 seconds as a default
                    GlobalVars.api_backoff_time = time.time() + 12
            message_hq += " Backing off on requests for the next 12 seconds."

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]

        GlobalVars.api_request_lock.release()

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue

            num_scanned += 1

            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(
                    post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title=title,
                                                 body=body,
                                                 user_name=owner_name,
                                                 user_url=owner_link,
                                                 post_site=site,
                                                 post_id=q_id,
                                                 is_answer=False,
                                                 body_is_summary=False,
                                                 owner_rep=owner_rep,
                                                 post_score=post_score)
            if is_spam:
                try:
                    handle_spam(title=title,
                                body=body,
                                poster=owner_name,
                                site=site,
                                post_url=link,
                                poster_url=owner_link,
                                post_id=q_id,
                                reasons=reason,
                                is_answer=False,
                                why=why,
                                owner_rep=owner_rep,
                                post_score=post_score,
                                up_vote_count=up_vote_count,
                                down_vote_count=down_vote_count,
                                question_id=None)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    num_scanned += 1
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(
                            answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(title=answer_title,
                                                         body=body,
                                                         user_name=owner_name,
                                                         user_url=owner_link,
                                                         post_site=site,
                                                         post_id=a_id,
                                                         is_answer=True,
                                                         body_is_summary=False,
                                                         owner_rep=owner_rep,
                                                         post_score=post_score)
                    if is_spam:
                        try:
                            handle_spam(title=title,
                                        body=body,
                                        poster=owner_name,
                                        site=site,
                                        post_url=link,
                                        poster_url=owner_link,
                                        post_id=a_id,
                                        reasons=reason,
                                        is_answer=True,
                                        why=why,
                                        owner_rep=owner_rep,
                                        post_score=post_score,
                                        up_vote_count=up_vote_count,
                                        down_vote_count=down_vote_count,
                                        question_id=q_id)
                        except:
                            print "NOP"
            except:
                print "no answers"

        end_time = time.time()
        GlobalVars.posts_scan_stats_lock.acquire()
        GlobalVars.num_posts_scanned += num_scanned
        GlobalVars.post_scan_time += end_time - start_time
        GlobalVars.posts_scan_stats_lock.release()
        return
Exemple #29
0
    def init_websocket(self):
        try:
            GlobalVars.metasmoke_ws = websocket.create_connection(
                GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host)
            GlobalVars.metasmoke_ws.send(
                json.dumps({
                    "command":
                    "subscribe",
                    "identifier":
                    "{\"channel\":\"SmokeDetectorChannel\",\"key\":\"" +
                    GlobalVars.metasmoke_key + "\"}"
                }))

            while True:
                a = GlobalVars.metasmoke_ws.recv()
                print(a)
                try:
                    data = json.loads(a)
                    if "message" in data:
                        message = data['message']
                        if isinstance(message, Iterable):
                            if "message" in message:
                                GlobalVars.charcoal_hq.send_message(
                                    message['message'])
                            elif "blacklist" in message:
                                datahandling.add_blacklisted_user(
                                    (message['blacklist']['uid'],
                                     message['blacklist']['site']),
                                    "metasmoke", message['blacklist']['post'])
                            elif "naa" in message:
                                post_site_id = parsing.fetch_post_id_and_site_from_url(
                                    message["naa"]["post_link"])
                                datahandling.add_ignored_post(
                                    post_site_id[0:2])
                            elif "fp" in message:
                                post_site_id = parsing.fetch_post_id_and_site_from_url(
                                    message["fp"]["post_link"])
                                datahandling.add_false_positive(
                                    post_site_id[0:2])
                            elif "report" in message:
                                post_data = apigetpost.api_get_post(
                                    message["report"]["post_link"])
                                if post_data is None or post_data is False:
                                    continue
                                if datahandling.has_already_been_posted(
                                        post_data.site, post_data.post_id,
                                        post_data.title
                                ) and not datahandling.is_false_positive(
                                    (post_data.post_id, post_data.site)):
                                    continue
                                user = parsing.get_user_from_url(
                                    post_data.owner_url)
                                if user is not None:
                                    datahandling.add_blacklisted_user(
                                        user, "metasmoke", post_data.post_url)
                                why = u"Post manually reported by user *{}* from metasmoke.\n".format(
                                    message["report"]["user"])
                                spamhandling.handle_spam(
                                    title=post_data.title,
                                    body=post_data.body,
                                    poster=post_data.owner_name,
                                    site=post_data.site,
                                    post_url=post_data.post_url,
                                    poster_url=post_data.owner_url,
                                    post_id=post_data.post_id,
                                    reasons=[
                                        "Manually reported " +
                                        post_data.post_type
                                    ],
                                    is_answer=post_data.post_type == "answer",
                                    why=why,
                                    owner_rep=post_data.owner_rep,
                                    post_score=post_data.score,
                                    up_vote_count=post_data.up_vote_count,
                                    down_vote_count=post_data.down_vote_count,
                                    question_id=post_data.question_id)
                            elif "commit_status" in message:
                                c = message["commit_status"]
                                sha = c["commit_sha"][:7]
                                if c["commit_sha"] != os.popen(
                                        'git log --pretty=format:"%H" -n 1'
                                ).read():
                                    if c["status"] == "success":
                                        if "autopull" in c["commit_message"]:
                                            GlobalVars.charcoal_hq.send_message(
                                                "[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling..."
                                                .format(ci_link=c["ci_url"],
                                                        commit_sha=sha))
                                            time.sleep(2)
                                            os._exit(3)
                                        else:
                                            GlobalVars.charcoal_hq.send_message(
                                                "[CI]({ci_link}) on {commit_sha} succeeded."
                                                .format(ci_link=c["ci_url"],
                                                        commit_sha=sha))
                                    elif c["status"] == "failure":
                                        GlobalVars.charcoal_hq.send_message(
                                            "[CI]({ci_link}) on {commit_sha} failed."
                                            .format(ci_link=c["ci_url"],
                                                    commit_sha=sha))
                except Exception, e:
                    GlobalVars.metasmoke_ws = websocket.create_connection(
                        GlobalVars.metasmoke_ws_host,
                        origin=GlobalVars.metasmoke_host)
                    GlobalVars.metasmoke_ws.send(
                        json.dumps({
                            "command":
                            "subscribe",
                            "identifier":
                            "{\"channel\":\"SmokeDetectorChannel\"}"
                        }))
                    print e
                    try:
                        exc_info = sys.exc_info()
                        traceback.print_exception(*exc_info)
                    except:
                        print "meh"
        except:
            print "Couldn't bind to MS websocket"
    def make_api_call_for_site(self, site):
        if site not in self.queue:
            return

        self.queue_modify_lock.acquire()
        new_posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        new_post_ids = [int(k) for k in new_posts.keys()]

        if GlobalVars.flovis is not None:
            for post_id in new_post_ids:
                GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id,
                                        {'site': site, 'posts': list(new_posts.keys())})

        self.queue_timing_modify_lock.acquire()
        post_add_times = [v for k, v in new_posts.items()]
        pop_time = datetime.utcnow()

        for add_time in post_add_times:
            try:
                seconds_in_queue = (pop_time - add_time).total_seconds()
                if site in self.queue_timings:
                    self.queue_timings[site].append(seconds_in_queue)
                else:
                    self.queue_timings[site] = [seconds_in_queue]
            except KeyError:  # XXX: Any other possible exception?
                continue  # Skip to next item if we've got invalid data or missing values.

        store_queue_timings()

        self.queue_timing_modify_lock.release()
        self.max_ids_modify_lock.acquire()

        if site in self.previous_max_ids and max(new_post_ids) > self.previous_max_ids[site]:
            previous_max_id = self.previous_max_ids[site]
            intermediate_posts = range(previous_max_id + 1, max(new_post_ids))

            # We don't want to go over the 100-post API cutoff, so take the last
            # (100-len(new_post_ids)) from intermediate_posts

            intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):]

            # new_post_ids could contain edited posts, so merge it back in
            combined = chain(intermediate_posts, new_post_ids)

            # Could be duplicates, so uniquify
            posts = list(set(combined))
        else:
            posts = new_post_ids

        try:
            if max(new_post_ids) > self.previous_max_ids[site]:
                self.previous_max_ids[site] = max(new_post_ids)
                store_bodyfetcher_max_ids()
        except KeyError:
            self.previous_max_ids[site] = max(new_post_ids)
            store_bodyfetcher_max_ids()

        self.max_ids_modify_lock.release()

        log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site))
        if len(new_post_ids) > 30:
            log('debug', "{} +{} more".format(sorted(new_post_ids)[:30], len(new_post_ids) - 30))
        else:
            log('debug', sorted(new_post_ids))
        if len(new_post_ids) == len(posts):
            log('debug', "[ *Identical* ]")
        elif len(posts) > 30:
            log('debug', "{} +{} more".format(sorted(posts)[:30], len(posts) - 30))
        else:
            log('debug', sorted(posts))

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}" \
                                "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join([str(post) for post in posts]))

        url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \
              "&filter=!*xq08dCDNr)PlxxXfaN8ntivx(BPlY_8XASyXLX-J7F-)VK*Q3KTJVkvp*&key=IAkbitmze4B8KpacUfLqkw((" \
              "{optional_min_query_param}".format(q_modifier=question_modifier, site=site,
                                                  optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        GlobalVars.api_request_lock.acquire()
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            time_request_made = datetime.now().strftime('%H:%M:%S')
            response = requests.get(url, timeout=20).json()
        except (requests.exceptions.Timeout, requests.ConnectionError, Exception):
            # Any failure in the request being made (timeout or otherwise) should be added back to
            # the queue.
            self.queue_modify_lock.acquire()
            if site in self.queue:
                self.queue[site].update(new_posts)
            else:
                self.queue[site] = new_posts
            self.queue_modify_lock.release()
            GlobalVars.api_request_lock.release()
            return

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                tell_rooms_with("debug", "API quota rolled over with {0} requests remaining. "
                                         "Current quota: {1}.".format(GlobalVars.apiquota,
                                                                      response["quota_remaining"]))

                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '')
                    api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()

                tell_rooms_with("debug", api_quota_used_per_site)
                clear_api_data()
            if response["quota_remaining"] == 0:
                tell_rooms_with("debug", "API reports no quota left!  May be a glitch.")
                tell_rooms_with("debug", str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                tell_rooms_with("debug", "Restart: API quota is {quota}."
                                         .format(quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made)
            if "error_id" in response and response["error_id"] == 502:
                if GlobalVars.api_backoff_time < time.time() + 12:  # Add a backoff of 10 + 2 seconds as a default
                    GlobalVars.api_backoff_time = time.time() + 12
            message_hq += " Backing off on requests for the next 12 seconds."
            message_hq += " Previous URL: `{}`".format(url)

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]

        GlobalVars.api_request_lock.release()

        if len(message_hq) > 0:
            tell_rooms_with("debug", message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            pnb = copy.deepcopy(post)
            if 'body' in pnb:
                pnb['body'] = 'Present, but truncated'
            if 'answers' in pnb:
                del pnb['answers']

            if "title" not in post or "body" not in post:
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage('bodyfetcher/api_response/no_content', site, post['question_id'], pnb)
                continue

            post['site'] = site
            try:
                post['edited'] = (post['creation_date'] != post['last_edit_date'])
            except KeyError:
                post['edited'] = False  # last_edit_date not present = not edited

            try:
                post_ = Post(api_response=post)
            except PostParseError as err:
                log('error', 'Error {0} when parsing post: {1!r}'.format(err, post_))
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage('bodyfetcher/api_response/error', site, post['question_id'], pnb)
                continue

            num_scanned += 1

            is_spam, reason, why = check_if_spam(post_)

            if is_spam:
                try:
                    if GlobalVars.flovis is not None and 'question_id' in post:
                        GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, post['question_id'],
                                                {'post': pnb, 'check_if_spam': [is_spam, reason, why]})
                    handle_spam(post=post_,
                                reasons=reason,
                                why=why)
                except Exception as e:
                    log('error', "Exception in handle_spam:", e)
            elif GlobalVars.flovis is not None and 'question_id' in post:
                GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, post['question_id'],
                                        {'post': pnb, 'check_if_spam': [is_spam, reason, why]})

            try:
                if "answers" not in post:
                    pass
                else:
                    for answer in post["answers"]:
                        anb = copy.deepcopy(answer)
                        if 'body' in anb:
                            anb['body'] = 'Present, but truncated'

                        num_scanned += 1
                        answer["IsAnswer"] = True  # Necesssary for Post object
                        answer["title"] = ""  # Necessary for proper Post object creation
                        answer["site"] = site  # Necessary for proper Post object creation
                        try:
                            answer['edited'] = (answer['creation_date'] != answer['last_edit_date'])
                        except KeyError:
                            answer['edited'] = False  # last_edit_date not present = not edited
                        answer_ = Post(api_response=answer, parent=post_)

                        is_spam, reason, why = check_if_spam(answer_)
                        if is_spam:
                            try:
                                if GlobalVars.flovis is not None and 'answer_id' in answer:
                                    GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, answer['answer_id'],
                                                            {'post': anb, 'check_if_spam': [is_spam, reason, why]})
                                handle_spam(answer_,
                                            reasons=reason,
                                            why=why)
                            except Exception as e:
                                log('error', "Exception in handle_spam:", e)
                        elif GlobalVars.flovis is not None and 'answer_id' in answer:
                            GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, answer['answer_id'],
                                                    {'post': anb, 'check_if_spam': [is_spam, reason, why]})

            except Exception as e:
                log('error', "Exception handling answers:", e)

        end_time = time.time()
        GlobalVars.posts_scan_stats_lock.acquire()
        GlobalVars.num_posts_scanned += num_scanned
        GlobalVars.post_scan_time += end_time - start_time
        GlobalVars.posts_scan_stats_lock.release()
        return
Exemple #31
0
    def make_api_call_for_site(self, site):
        if site not in self.queue:
            return

        self.queue_modify_lock.acquire()
        posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        question_modifier = ""
        pagesize_modifier = ""

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = "&pagesize={pagesize}" \
                                "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date))
        else:
            question_modifier = "/{0}".format(";".join(str(post) for post in posts))

        url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \
              "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \
              "{optional_min_query_param}".format(q_modifier=question_modifier, site=site,
                                                  optional_min_query_param=pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        GlobalVars.api_request_lock.acquire()
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            time_request_made = datetime.now().strftime('%H:%M:%S')
            response = requests.get(url, timeout=20).json()
        except (requests.exceptions.Timeout, requests.ConnectionError, Exception):
            # Any failure in the request being made (timeout or otherwise) should be added back to
            # the queue.
            self.queue_modify_lock.acquire()
            if site in self.queue:
                self.queue[site].extend(posts)
            else:
                self.queue[site] = posts
            self.queue_modify_lock.release()
            return

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message("API quota rolled over with {0} requests remaining. "
                                                    "Current quota: {1}.".format(GlobalVars.apiquota,
                                                                                 response["quota_remaining"]))
                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '')
                    api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message("API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message("Restart: API quota is {quota}."
                                                    .format(quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made)
            if "error_id" in response and response["error_id"] == 502:
                if GlobalVars.api_backoff_time < time.time() + 12:  # Add a backoff of 10 + 2 seconds as a default
                    GlobalVars.api_backoff_time = time.time() + 12
            message_hq += " Backing off on requests for the next 12 seconds."

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]

        GlobalVars.api_request_lock.release()

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue

            num_scanned += 1

            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title=title,
                                                 body=body,
                                                 user_name=owner_name,
                                                 user_url=owner_link,
                                                 post_site=site,
                                                 post_id=q_id,
                                                 is_answer=False,
                                                 body_is_summary=False,
                                                 owner_rep=owner_rep,
                                                 post_score=post_score)
            if is_spam:
                try:
                    handle_spam(title=title,
                                body=body,
                                poster=owner_name,
                                site=site,
                                post_url=link,
                                poster_url=owner_link,
                                post_id=q_id,
                                reasons=reason,
                                is_answer=False,
                                why=why,
                                owner_rep=owner_rep,
                                post_score=post_score,
                                up_vote_count=up_vote_count,
                                down_vote_count=down_vote_count,
                                question_id=None)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    num_scanned += 1
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(title=answer_title,
                                                         body=body,
                                                         user_name=owner_name,
                                                         user_url=owner_link,
                                                         post_site=site,
                                                         post_id=a_id,
                                                         is_answer=True,
                                                         body_is_summary=False,
                                                         owner_rep=owner_rep,
                                                         post_score=post_score)
                    if is_spam:
                        try:
                            handle_spam(title=title,
                                        body=body,
                                        poster=owner_name,
                                        site=site,
                                        post_url=link,
                                        poster_url=owner_link,
                                        post_id=a_id,
                                        reasons=reason,
                                        is_answer=True,
                                        why=why,
                                        owner_rep=owner_rep,
                                        post_score=post_score,
                                        up_vote_count=up_vote_count,
                                        down_vote_count=down_vote_count,
                                        question_id=q_id)
                        except:
                            print "NOP"
            except:
                print "no answers"

        end_time = time.time()
        GlobalVars.posts_scan_stats_lock.acquire()
        GlobalVars.num_posts_scanned += num_scanned
        GlobalVars.post_scan_time += end_time - start_time
        GlobalVars.posts_scan_stats_lock.release()
        return
    def make_api_call_for_site(self, site):
        posts = self.queue.pop(site)

        self.queue_store_lock.acquire()
        store_bodyfetcher_queue()
        self.queue_store_lock.release()

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            min_query = ""
            if self.last_activity_date != 0:
                min_query = "&min=" + str(self.last_activity_date)
                pagesize = "50"
            else:
                pagesize = "25"
            url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query
        else:
            url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(("

        # wait to make sure API has/updates post data
        time.sleep(3)
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            response = requests.get(url, timeout=20).json()
        except requests.exceptions.Timeout:
            return  # could add some retrying logic here, but eh.

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response["quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0:
                GlobalVars.charcoal_hq.send_message("API quota rolled over with {} requests remaining.".format(GlobalVars.apiquota))
                sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True)
                api_quota_used_per_site = ""
                for site, quota_used in sorted_calls_per_site:
                    api_quota_used_per_site = api_quota_used_per_site + site.replace('.com', '').replace('.stackexchange', '') + ": " + str(quota_used) + "\n"
                api_quota_used_per_site = api_quota_used_per_site.strip()
                GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False)
                clear_api_data()
            if response["quota_remaining"] == 0:
                GlobalVars.charcoal_hq.send_message("API reports no quota left!  May be a glitch.")
                GlobalVars.charcoal_hq.send_message(str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                GlobalVars.charcoal_hq.send_message("Restart: API quota is {}.".format(response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq = message_hq + " Error: {}.".format(response["error_message"])

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]
            message_hq = message_hq + "\n" + "Backoff received of " + str(response["backoff"]) + " seconds."

        if len(message_hq) > 0:
            GlobalVars.charcoal_hq.send_message(message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]:
                self.last_activity_date = response["items"][0]["last_activity_date"]

        for post in response["items"]:
            if "title" not in post or "body" not in post:
                continue
            title = GlobalVars.parser.unescape(post["title"])
            body = GlobalVars.parser.unescape(post["body"])
            link = post["link"]
            post_score = post["score"]
            up_vote_count = post["up_vote_count"]
            down_vote_count = post["down_vote_count"]
            try:
                owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"])
                owner_link = post["owner"]["link"]
                owner_rep = post["owner"]["reputation"]
            except:
                owner_name = ""
                owner_link = ""
                owner_rep = 0
            q_id = str(post["question_id"])

            is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score)
            if is_spam:
                try:
                    handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count)
                except:
                    print "NOP"
            try:
                for answer in post["answers"]:
                    answer_title = ""
                    body = answer["body"]
                    print "got answer from owner with name " + owner_name
                    link = answer["link"]
                    a_id = str(answer["answer_id"])
                    post_score = answer["score"]
                    up_vote_count = answer["up_vote_count"]
                    down_vote_count = answer["down_vote_count"]
                    try:
                        owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"])
                        owner_link = answer["owner"]["link"]
                        owner_rep = answer["owner"]["reputation"]
                    except:
                        owner_name = ""
                        owner_link = ""
                        owner_rep = 0

                    is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score)
                    if is_spam:
                        try:
                            handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count)
                        except:
                            print "NOP"
            except:
                print "no answers"
        return
    def handle_websocket_data(data):
        if "message" not in data:
            return

        message = data["message"]
        if isinstance(message, Iterable):
            if "message" in message:
                GlobalVars.charcoal_hq.send_message(message["message"])
            elif "blacklist" in message:
                datahandling.add_blacklisted_user(
                    (message["blacklist"]["uid"], message["blacklist"]["site"]),
                    "metasmoke",
                    message["blacklist"]["post"],
                )
            elif "naa" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"])
                datahandling.add_ignored_post(post_site_id[0:2])
            elif "fp" in message:
                post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"])
                datahandling.add_false_positive(post_site_id[0:2])
            elif "report" in message:
                post_data = apigetpost.api_get_post(message["report"]["post_link"])
                if post_data is None or post_data is False:
                    return
                if datahandling.has_already_been_posted(
                    post_data.site, post_data.post_id, post_data.title
                ) and not datahandling.is_false_positive((post_data.post_id, post_data.site)):
                    return
                user = parsing.get_user_from_url(post_data.owner_url)
                if user is not None:
                    datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url)
                why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"])
                spamhandling.handle_spam(
                    title=post_data.title,
                    body=post_data.body,
                    poster=post_data.owner_name,
                    site=post_data.site,
                    post_url=post_data.post_url,
                    poster_url=post_data.owner_url,
                    post_id=post_data.post_id,
                    reasons=["Manually reported " + post_data.post_type],
                    is_answer=post_data.post_type == "answer",
                    why=why,
                    owner_rep=post_data.owner_rep,
                    post_score=post_data.score,
                    up_vote_count=post_data.up_vote_count,
                    down_vote_count=post_data.down_vote_count,
                    question_id=post_data.question_id,
                )
            elif "commit_status" in message:
                c = message["commit_status"]
                sha = c["commit_sha"][:7]
                if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read():
                    if c["status"] == "success":
                        if "autopull" in c["commit_message"]:
                            s = "[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling...".format(
                                ci_link=c["ci_url"], commit_sha=sha
                            )
                            GlobalVars.charcoal_hq.send_message(s)
                            time.sleep(2)
                            os._exit(3)
                        else:
                            s = "[CI]({ci_link}) on {commit_sha} succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)
                    elif c["status"] == "failure":
                        s = "[CI]({ci_link}) on {commit_sha} failed.".format(ci_link=c["ci_url"], commit_sha=sha)

                    # noinspection PyUnboundLocalVariable
                    GlobalVars.charcoal_hq.send_message(s)