def _collect_others(cat_i, search_for, sub, subreddit, time_filter): """ Return PRAW ListingGenerator for all other categories (excluding Search). Calls previously defined private method: GetSubmissionsSwitch().scrape_sub() Parameters ---------- cat_i: int Integer denoting the index within the categories or short_cat lists search_for: str String denoting keywords to search for sub: str String denoting Subreddit name subreddit: PRAW Subreddit object time_filter: str String denoting time filter to apply Returns ------- category_submissions: PRAW ListingGenerator """ category = categories[short_cat.index(cat_i)] index = short_cat.index(cat_i) Halo().info("Processing %s %s results from r/%s." % (search_for, category, sub)) if time_filter != None: Halo().info("Time filter: %s" % time_filter.capitalize()) return GetSubmissionsSwitch(search_for, subreddit, time_filter).scrape_sub(index)
def _format_subreddit_log(settings_dict): """ Format Subreddit log message. Parameters ---------- settings_dict: dict Dictionary containing Subreddit scraping settings Returns ------- None """ time_filters = ["day", "hour", "month", "week", "year"] for subreddit_name, settings in settings_dict.items(): for each_setting in settings: if each_setting[2] in time_filters: logging.info( "Getting posts from the past %s for %s results." % (each_setting[2], categories[short_cat.index( each_setting[0].upper())])) if each_setting[0].lower() != "s": logging.info( "Scraping r/%s for %s %s results..." % (subreddit_name, each_setting[1], categories[short_cat.index(each_setting[0].upper())])) elif each_setting[0].lower() == "s": logging.info( "Searching and scraping r/%s for posts containing '%s'..." % (subreddit_name, each_setting[1])) logging.info("")
def make_json_skeleton(cat_i, search_for, sub, time_filter): """ Create a skeleton for JSON export. Include scrape details at the top. Parameters ---------- cat_i: str String denoting the shortened category in the `short_cat` list search_for: str String denoting n_results returned or keywords searched for sub: str String denoting the Subreddit name time_filter: str String denoting the time filter applied to the scrape Returns ------- skeleton: dict Dictionary containing Subreddit data """ skeleton = { "scrape_settings": { "subreddit": sub, "category": categories[short_cat.index(cat_i)].lower(), "n_results_or_keywords": search_for, "time_filter": time_filter }, "data": None } return skeleton
def _add_each_setting(pretty_subs, s_master): """ Add each Subreddit setting to the PrettyTable. Parameters ---------- pretty_subs: PrettyTable PrettyTable instance s_master: dict Dictionary containing all scrape settings Returns ------- None """ for sub, settings in s_master.items(): for each_sub in settings: cat_i = short_cat.index(each_sub[0].upper()) time_filter = each_sub[2].capitalize() \ if each_sub[2] != None \ else each_sub[2] pretty_subs.add_row( [sub, categories[cat_i], time_filter, each_sub[1]])
def _r_category(self, cat_i, category_n): """ Subreddit category name switch. Parameters ---------- cat_i: str String denoting the abbreviated category category_n: int Integer denoting a dictionary key Returns ------- category: str The category name """ switch = { 0: categories[5], 1: categories[short_cat.index(cat_i)] \ if cat_i != short_cat[5] \ else None } return switch.get(category_n)
def _collect_others(args, cat_i, search_for, sub, subreddit, time_filter): """ Return PRAW ListingGenerator for all other categories (excluding Search). Calls previously defined private method: GetPostsSwitch().scrape_sub() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI cat_i: int Integer denoting the index within the categories or short_cat lists search_for: str String denoting keywords to search for sub: str String denoting Subreddit name subreddit: PRAW Subreddit object time_filter: str String denoting time filter to apply Returns ------- category_submissions: PRAW ListingGenerator """ category = categories[short_cat.index(cat_i)] index = short_cat.index(cat_i) print(Style.BRIGHT + "\nProcessing %s %s results from r/%s..." % (search_for, category, sub)) if time_filter != None: print(Style.BRIGHT + "Time filter: %s" % time_filter.capitalize()) return GetPostsSwitch(search_for, subreddit, time_filter).scrape_sub(index)