Example #1
0
def init():
    """Inits the bot."""

    reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET,
                         user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME,
                         password=config.REDDIT_PASSWORD)

    processed_posts = load_log()
    whitelist = load_whitelist()

    for subreddit in config.SUBREDDITS:

        for submission in reddit.subreddit(subreddit).new():

            if submission.id not in processed_posts:

                clean_url = submission.url.replace("amp.", "")
                ext = tldextract.extract(clean_url)
                domain = "{}.{}".format(ext.domain, ext.suffix)

                if domain in whitelist:

                    try:
                        article, title = extract_article_from_url(clean_url)
                        summary_dict = summary.get_summary(article, title)
                    except Exception as e:
                        log_error("{},{}".format(clean_url, e))
                        update_log(submission.id)
                        print("Failed:", submission.id)
                        continue

                    # To reduce low quality submissions, we only process those that made a meaningful summary.
                    if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD:

                        # Create a wordcloud, upload it to Imgur and get back the url.
                        image_url = cloud.generate_word_cloud(
                            summary_dict["article_words"])

                        # We start creating the comment body.
                        post_body = ""

                        for sentence in summary_dict["top_sentences"]:
                            post_body += """> {}\n\n""".format(sentence)

                        top_words = ""

                        for index, word in enumerate(summary_dict["top_words"]):
                            top_words += "{}^#{} ".format(word, index+1)

                        post_message = HEADER.format(
                            summary_dict["title"], submission.url, summary_dict["reduction"]) + post_body + FOOTER.format(image_url, top_words)

                        reddit.submission(submission).reply(post_message)
                        update_log(submission.id)
                        print("Replied to:", submission.id)
                    else:
                        update_log(submission.id)
                        print("Skipped:", submission.id)
Example #2
0
def init():
    """Inits the bot."""

    reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET,
                         user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME,
                         password=config.REDDIT_PASSWORD)

    processed_posts = load_log()
    whitelist = load_whitelist()

    for subreddit in config.SUBREDDITS:

        for submission in reddit.subreddit(subreddit).new(limit=50):

            if submission.id not in processed_posts:

                clean_url = submission.url.replace("amp.", "")
                ext = tldextract.extract(clean_url)
                domain = "{}.{}".format(ext.domain, ext.suffix)

                if domain in whitelist:

                    try:
                        with requests.get(clean_url, headers=HEADERS, timeout=10) as response:

                            # Sometimes Requests makes an incorrect guess, we force it to use utf-8
                            if response.encoding == "ISO-8859-1":
                                response.encoding = "utf-8"

                            html_source = response.text

                        article_title, article_date, article_body = scraper.scrape_html(
                            html_source)

                        summary_dict = summary.get_summary(article_body)
                    except Exception as e:
                        log_error("{},{}".format(clean_url, e))
                        update_log(submission.id)
                        print("Failed:", submission.id)
                        continue

                    # To reduce low quality submissions, we only process those that made a meaningful summary.
                    if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD:

                        # Create a wordcloud, upload it to Imgur and get back the url.
                        image_url = cloud.generate_word_cloud(
                            summary_dict["article_words"])

                        # We start creating the comment body.
                        post_body = "\n\n".join(
                            ["> " + item for item in summary_dict["top_sentences"]])

                        top_words = ""

                        for index, word in enumerate(summary_dict["top_words"]):
                            top_words += "{}^#{} ".format(word, index+1)

                        post_message = TEMPLATE.format(
                            article_title, clean_url, summary_dict["reduction"], article_date, post_body, image_url, top_words)

                        reddit.submission(submission).reply(post_message)
                        update_log(submission.id)
                        print("Replied to:", submission.id)
                    else:
                        update_log(submission.id)
                        print("Skipped:", submission.id)