def run_urs(self): """ Switch for running all URS tools. Calls previously defined public methods: PRAW validation: Validation.validate_user() PRAW scrapers: RunSubreddit.run() RunRedditor.run() RunComments.run() RunBasic.run() Analytical tools: GenerateFrequencies.generate() GenerateWordcloud.generate() """ if self._args.check: """ Run rate limit check. """ logging.info("RUNNING API CREDENTIALS CHECK.") logging.info("") Validation.validate_user(self._parser, self._reddit) elif self._args.subreddit or self._args.redditor or self._args.comments or self._args.basic: """ Run PRAW scrapers. """ Validation.validate_user(self._parser, self._reddit) if self._args.subreddit: RunSubreddit.run(self._args, self._parser, self._reddit) if self._args.redditor: RunRedditor.run(self._args, self._parser, self._reddit) if self._args.comments: RunComments.run(self._args, self._parser, self._reddit) elif self._args.basic: RunBasic.run(self._args, self._parser, self._reddit) elif self._args.frequencies or self._args.wordcloud: """ Run analytical tools. """ if self._args.frequencies: GenerateFrequencies.generate(self._args) if self._args.wordcloud: GenerateWordcloud.generate(self._args)
def test_validate_user(self): parser = MakeArgs.make_scraper_args() reddit = Login.create_reddit_object() try: Validation.validate_user(parser, reddit) assert True except: assert False
def test_check_redditor_both_valid_and_invalid_redditors(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = ["spez", "sdhfgiuoh3284th9enbsprgh8-w-wher9ghwe9hw49"] Validation._check_redditors(invalid, object_list, reddit, valid) assert len(valid) == 1 assert len(invalid) == 1
def test_check_redditor_only_valid_redditors(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = ["spez"] Validation._check_redditors(invalid, object_list, reddit, valid) assert len(valid) == 1 assert not invalid
def test_check_subreddits_only_valid_subreddits(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = ["askreddit", "wallstreetbets", "cscareerquestions"] Validation._check_subreddits(invalid, object_list, reddit, valid) assert not invalid assert len(valid) == 3
def test_check_submissions_only_invalid_submissions(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = [ "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/" ] Validation._check_submissions(invalid, object_list, reddit, valid) assert not valid assert len(invalid) == 1
def test_check_submissions_only_valid_submissions(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = [ "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/" ] Validation._check_submissions(invalid, object_list, reddit, valid) assert len(valid) == 1 assert not invalid
def test_check_submissions_both_valid_and_invalid_submissions(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = [ "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/", "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/" ] Validation._check_submissions(invalid, object_list, reddit, valid) assert len(valid) == 1 assert len(invalid) == 1
def test_check_subreddits_only_invalid_subreddits(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = [ "shdg8h342842h3gidbsfgjdbs", "asdfhauhwspf8912034812hudfghb979023974ht", "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l" ] Validation._check_subreddits(invalid, object_list, reddit, valid) assert not valid assert len(invalid) == 3
def _find_subs(reddit, search_for): """ Return a list of valid and invalid Subreddits. Calls a method from an external module: Validation.existence() Parameters ---------- reddit: Reddit object Reddit instance created by PRAW API credentials search_for: str String denoting Subreddits to scrape for Returns ------- subs: list List of valid Subreddits not_subs: list List of invalid Subreddits """ search_for = " ".join(search_for.split()) sub_list = [subreddit for subreddit in search_for.split(" ")] not_subs, subs = Validation.check_existence(sub_list, reddit, "subreddit") return not_subs, subs
def test_check_subreddit_both_valid_and_invalid_subreddits(self): invalid, valid = [], [] reddit = Login.create_reddit_object() object_list = [ "askreddit", "wallstreetbets", "cscareerquestions", "shdg8h342842h3gidbsfgjdbs", "asdfhauhwspf8912034812hudfghb979023974ht", "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l", "u0893-45u238hdusafghudsgh982" ] Validation._check_subreddits(invalid, object_list, reddit, valid) assert len(valid) == 3 assert len(invalid) == 4
def _create_settings(args, parser, reddit): """ Create settings for each user input. Calls methods from an external modules: GetPRAWScrapeSettings().create_list() Validation.validate() GetPRAWScrapeSettings().get_settings() Global.make_list_dict() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials Returns ------- s_master: dict Dictionary containing all scrape settings """ sub_list = GetPRAWScrapeSettings().create_list(args, "subreddit") not_subs, subs = Validation.validate(sub_list, reddit, "subreddit") s_master = make_list_dict(subs) GetPRAWScrapeSettings().get_settings(args, not_subs, s_master, "subreddit") return s_master
def _find_subs(parser, reddit, search_for): """ Return a list of valid and invalid Subreddits. Calls a method from an external module: Validation.existence() Parameters ---------- parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials search_for: str String denoting Subreddits to scrape for Returns ------- subs: list List of valid Subreddits not_subs: list List of invalid Subreddits """ search_for = " ".join(search_for.split()) sub_list = [subreddit for subreddit in search_for.split(" ")] subs, not_subs = Validation.existence(s_t[0], sub_list, parser, reddit, s_t) return subs, not_subs
def _set_info_and_object(args, reddit): """ Set the stream information and Reddit object based on CLI args. Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI reddit: PRAW Reddit object Returns ------- reddit_object: PRAW Subreddit or Redditor object stream_info: str String denoting the livestream information """ if args.live_subreddit: PRAWTitles.lr_title() Validation.validate([args.live_subreddit], reddit, "subreddit") initial_message = f"Initializing Subreddit livestream for r/{args.live_subreddit}." stream_info = f"in r/{args.live_subreddit}" reddit_object = reddit.subreddit(args.live_subreddit) elif args.live_redditor: PRAWTitles.lu_title() Validation.validate([args.live_redditor], reddit, "redditor") initial_message = f"Initializing Redditor livestream for u/{args.live_redditor}." stream_info = f"by u/{args.live_redditor}" reddit_object = reddit.redditor(args.live_redditor) Halo().info(Fore.CYAN + Style.BRIGHT + initial_message) logging.info(initial_message + "..") Halo().info("New entries will appear when posted to Reddit.") return reddit_object, stream_info
def test_validate_all_valid_reddit_objects(self): reddit = Login.create_reddit_object() object_list = ["askreddit", "wallstreetbets", "cscareerquestions"] scraper_type = "subreddit" invalid, valid = Validation.validate(object_list, reddit, scraper_type) assert len(valid) == 3 assert not invalid
def test_check_existence_both_valid_and_invalid_redditors(self): reddit = Login.create_reddit_object() object_list = ["spez", "sdhfgiuoh3284th9enbsprgh8-w-wher9ghwe9hw49"] scraper_type = "redditor" invalid, valid = Validation.check_existence(object_list, reddit, scraper_type) assert len(valid) == 1 assert len(invalid) == 1
def test_check_existence_only_valid_redditors(self): reddit = Login.create_reddit_object() object_list = ["spez"] scraper_type = "redditor" invalid, valid = Validation.check_existence(object_list, reddit, scraper_type) assert len(valid) == 1 assert not invalid
def test_check_existence_only_invalid_submissions(self): reddit = Login.create_reddit_object() object_list = [ "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/" ] scraper_type = "comments" invalid, valid = Validation.check_existence(object_list, reddit, scraper_type) assert not valid assert len(invalid) == 1
def test_check_existence_only_valid_submissions(self): reddit = Login.create_reddit_object() object_list = [ "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/" ] scraper_type = "comments" invalid, valid = Validation.check_existence(object_list, reddit, scraper_type) assert len(valid) == 1 assert not invalid
def test_check_existence_only_invalid_subreddits(self): reddit = Login.create_reddit_object() object_list = [ "shdg8h342842h3gidbsfgjdbs", "asdfhauhwspf8912034812hudfghb979023974ht", "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l" ] scraper_type = "subreddit" invalid, valid = Validation.check_existence(object_list, reddit, scraper_type) assert not valid assert len(invalid) == 3
def test_validate_all_invalid_reddit_objects_force_quit(self): reddit = Login.create_reddit_object() object_list = [ "shdg8h342842h3gidbsfgjdbs", "asdfhauhwspf8912034812hudfghb979023974ht", "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l" ] scraper_type = "subreddit" try: _, _ = Validation.validate(object_list, reddit, scraper_type) assert False except SystemExit: assert True
def test_validate_both_valid_and_invalid_reddit_objects(self): reddit = Login.create_reddit_object() object_list = [ "askreddit", "wallstreetbets", "cscareerquestions", "shdg8h342842h3gidbsfgjdbs", "asdfhauhwspf8912034812hudfghb979023974ht", "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l" ] scraper_type = "subreddit" invalid, valid = Validation.validate(object_list, reddit, scraper_type) assert len(valid) == 3 assert len(invalid) == 3
def list_submissions(parser, post_list, reddit): """ Check if submissions exist and list posts that are not found. Calls a method from an external module: Validation.existence() Parameters ---------- parser: ArgumentParser argparse ArgumentParser object post_list: list List of submission URLs reddit: Reddit object Reddit instance created by PRAW API credentials Returns ------- posts: list List of valid submission URLs """ print("\nChecking if submission(s) exist...") logging.info("Validating submissions...") logging.info("") posts, not_posts = Validation.existence(s_t[2], post_list, parser, reddit, s_t) if not_posts: print( Fore.YELLOW + Style.BRIGHT + "\nThe following submissions were not found and will be skipped:" ) print(Fore.YELLOW + Style.BRIGHT + "-" * 55) print(*not_posts, sep="\n") logging.warning("Failed to validate the following submissions:") logging.warning("%s" % (not_posts)) logging.warning("Skipping.") logging.info("") if not posts: logging.critical("ALL SUBMISSIONS FAILED VALIDATION.") raise ValueError return not_posts, posts
def list_redditors(parser, reddit, user_list): """ Check if Redditors exist and list Redditors who are not found. Calls a public method from an external module: Validation.existence() Parameters ---------- parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials user_list: list List of Redditors Returns ------- users: list List of valid Redditors URLs """ print("\nChecking if Redditor(s) exist...") logging.info("Validating Redditors...") logging.info("") users, not_users = Validation.existence(s_t[1], user_list, parser, reddit, s_t) if not_users: print( Fore.YELLOW + Style.BRIGHT + "\nThe following Redditors were not found and will be skipped:" ) print(Fore.YELLOW + Style.BRIGHT + "-" * 59) print(*not_users, sep="\n") logging.warning("Failed to validate the following Redditors:") logging.warning("%s" % (not_users)) logging.warning("Skipping.") logging.info("") if not users: logging.critical("ALL REDDITORS FAILED VALIDATION.") raise ValueError return not_users, users
def list_subreddits(parser, reddit, s_t, sub_list): """ Check if Subreddits exist and list invalid Subreddits if applicable. Calls a method from an external module: Validation.existence() Parameters ---------- parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials s_t: list List of scraper types sub_list: list List of Redditors Returns ------- subs: list List of valid Subreddits """ print("\nChecking if Subreddit(s) exist...") logging.info("Validating Subreddits...") logging.info("") subs, not_subs = Validation().existence(s_t[0], sub_list, parser, reddit, s_t) if not_subs: print(Fore.YELLOW + Style.BRIGHT + "\nThe following Subreddits were not found and will be skipped:") print(Fore.YELLOW + Style.BRIGHT + "-" * 60) print(*not_subs, sep = "\n") logging.warning("Failed to validate the following Subreddits:") logging.warning("%s" % (not_subs)) logging.warning("Skipping.") logging.info("") if not subs: logging.critical("ALL SUBREDDITS FAILED VALIDATION.") raise ValueError return not_subs, subs
def run(args, parser, reddit): """ Run comments scraper. Calls a previously defined public method: Write.write() Calls public methods from external modules: GetPRAWScrapeSettings().create_list() Validation.validate() GetPRAWScrapeSettings().get_settings() Global.make_none_dict() PRAWTitles.c_title() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials Returns ------- c_master: dict Dictionary containing all submission comments scrape settings """ PRAWTitles.c_title() post_list = GetPRAWScrapeSettings().create_list(args, "comments") not_posts, posts = Validation.validate(post_list, reddit, "comments") c_master = make_none_dict(posts) GetPRAWScrapeSettings().get_settings(args, not_posts, c_master, "comments") Write.write(args, c_master, reddit) return c_master
def run(args, parser, reddit): """ Get, sort, then write scraped Redditor information to CSV or JSON. Calls a previously defined public method: Write.write() Calls public methods from external modules: GetPRAWScrapeSettings().create_list() Validation.validate() Global.make_none_dict() GetPRAWScrapeSettings().get_settings() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI parser: ArgumentParser argparse ArgumentParser object reddit: Reddit object Reddit instance created by PRAW API credentials Returns ------- u_master: dict Dictionary containing all Redditor scrape settings """ PRAWTitles.u_title() user_list = GetPRAWScrapeSettings().create_list(args, "redditor") not_users, users = Validation.validate(user_list, parser, reddit, "redditor") u_master = make_none_dict(users) GetPRAWScrapeSettings().get_settings(args, not_users, u_master, "redditor") Write.write(args, reddit, u_master) return u_master
def run_urs(self): """ Switch for running all URS tools. Calls previously defined public methods: PRAW validation: Validation.validate_user() PRAW scrapers: RunSubreddit.run() RunRedditor.run() RunComments.run() RunBasic.run() PRAW livestream scrapers: Livestream.stream() Analytical tools: GenerateFrequencies.generate() GenerateWordcloud.generate() """ if self._args.check: """ Run rate limit check. """ logging.info("RUNNING API CREDENTIALS CHECK.") logging.info("") Validation.validate_user(self._parser, self._reddit) elif self._args.tree: """ Display visual directory tree for a date (default is the current date). """ DateTree.display_tree(self._args.tree) elif self._args.subreddit or self._args.redditor or self._args.comments or self._args.basic: """ Run PRAW scrapers. """ Validation.validate_user(self._parser, self._reddit) if self._args.subreddit: RunSubreddit.run(self._args, self._parser, self._reddit) if self._args.redditor: RunRedditor.run(self._args, self._parser, self._reddit) if self._args.comments: RunComments.run(self._args, self._parser, self._reddit) elif self._args.basic: RunBasic.run(self._args, self._parser, self._reddit) elif self._args.live_subreddit or self._args.live_redditor: """ Run PRAW livestream scrapers. """ Validation.validate_user(self._parser, self._reddit) Livestream.stream(self._args, self._reddit) elif self._args.frequencies or self._args.wordcloud: """ Run analytical tools. """ if self._args.frequencies: GenerateFrequencies.generate(self._args) if self._args.wordcloud: GenerateWordcloud.generate(self._args)