def init(): """Inits the bot.""" reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET, user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME, password=config.REDDIT_PASSWORD) processed_posts = load_log() whitelist = load_whitelist() for subreddit in config.SUBREDDITS: for submission in reddit.subreddit(subreddit).new(): if submission.id not in processed_posts: clean_url = submission.url.replace("amp.", "") ext = tldextract.extract(clean_url) domain = "{}.{}".format(ext.domain, ext.suffix) if domain in whitelist: try: article, title = extract_article_from_url(clean_url) summary_dict = summary.get_summary(article, title) except Exception as e: log_error("{},{}".format(clean_url, e)) update_log(submission.id) print("Failed:", submission.id) continue # To reduce low quality submissions, we only process those that made a meaningful summary. if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD: # Create a wordcloud, upload it to Imgur and get back the url. image_url = cloud.generate_word_cloud( summary_dict["article_words"]) # We start creating the comment body. post_body = "" for sentence in summary_dict["top_sentences"]: post_body += """> {}\n\n""".format(sentence) top_words = "" for index, word in enumerate(summary_dict["top_words"]): top_words += "{}^#{} ".format(word, index+1) post_message = HEADER.format( summary_dict["title"], submission.url, summary_dict["reduction"]) + post_body + FOOTER.format(image_url, top_words) reddit.submission(submission).reply(post_message) update_log(submission.id) print("Replied to:", submission.id) else: update_log(submission.id) print("Skipped:", submission.id)
def init(): """Inits the bot.""" reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET, user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME, password=config.REDDIT_PASSWORD) processed_posts = load_log() whitelist = load_whitelist() for subreddit in config.SUBREDDITS: for submission in reddit.subreddit(subreddit).new(limit=50): if submission.id not in processed_posts: clean_url = submission.url.replace("amp.", "") ext = tldextract.extract(clean_url) domain = "{}.{}".format(ext.domain, ext.suffix) if domain in whitelist: try: with requests.get(clean_url, headers=HEADERS, timeout=10) as response: # Sometimes Requests makes an incorrect guess, we force it to use utf-8 if response.encoding == "ISO-8859-1": response.encoding = "utf-8" html_source = response.text article_title, article_date, article_body = scraper.scrape_html( html_source) summary_dict = summary.get_summary(article_body) except Exception as e: log_error("{},{}".format(clean_url, e)) update_log(submission.id) print("Failed:", submission.id) continue # To reduce low quality submissions, we only process those that made a meaningful summary. if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD: # Create a wordcloud, upload it to Imgur and get back the url. image_url = cloud.generate_word_cloud( summary_dict["article_words"]) # We start creating the comment body. post_body = "\n\n".join( ["> " + item for item in summary_dict["top_sentences"]]) top_words = "" for index, word in enumerate(summary_dict["top_words"]): top_words += "{}^#{} ".format(word, index+1) post_message = TEMPLATE.format( article_title, clean_url, summary_dict["reduction"], article_date, post_body, image_url, top_words) reddit.submission(submission).reply(post_message) update_log(submission.id) print("Replied to:", submission.id) else: update_log(submission.id) print("Skipped:", submission.id)