def test_add_analytics_method_nosave_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_analytics(parser) args = parser.parse_args(["--nosave"]) assert args.nosave == True
def test_parser_init_method_epilog_instance_variable(self): epilog = r""" [PRAW SUBREDDIT SCRAPING] Subreddit categories: h selecting Hot category n selecting New category c selecting Controversial category (time filter available) t selecting Top category (time filter available) r selecting Rising category s selecting Search category (time filter available) Subreddit time filters: all (default) day hour month week year [ANALYTICAL TOOLS] wordcloud export options: eps Encapsulated Postscript jpeg jpg pdf png ps Postscript rgba Raw RGBA bitmap tif tiff """ assert Cli.Parser()._epilog == epilog
def test_add_flags_method_skip_confirmation_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_flags(parser) args = parser.parse_args(["-y"]) assert args.y == True
def test_add_flags_method_basic_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_flags(parser) args = parser.parse_args(["--basic"]) assert args.basic == True
def test_add_praw_subreddit_options_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_praw_subreddit_options(parser) args = parser.parse_args(["--rules"]) assert args.rules == True
def test_add_praw_comments_options_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_praw_comments_options(parser) args = parser.parse_args(["--raw"]) assert args.raw == True
def test_add_examples_flag_method_examples_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_examples_flag(parser) args = parser.parse_args(["-e"]) assert args.examples == True
def test_add_examples_flag_method_examples_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_rate_limit_check_flag(parser) args = parser.parse_args(["--check"]) assert args.check == True
def test_add_export_method_json_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_export(parser) args = parser.parse_args(["--json"]) assert args.json == True
def test_parser_init_method_usage_instance_variable(self): usage = r"""$ Urs.py [-h] [-e] [--check] [-r <subreddit> <(h|n|c|t|r|s)> <n_results_or_keywords> [<optional_time_filter>]] [--rules] [-u <redditor> <n_results>] [-c <submission_url> <n_results>] [--raw] [-b] [-f <file_path>] [-wc <file_path> [<optional_export_format>]] [--nosave] [-y] [--csv] """ assert Cli.Parser()._usage == usage
def test_add_praw_livestream_options_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_praw_livestream_options(parser) args = parser.parse_args(["--stream-submissions"]) assert args.stream_submissions == True
def test_add_praw_livestream_flags_method_live_redditor_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_praw_livestream_flags(parser) args = parser.parse_args("--live-redditor spez".split()) assert args.live_redditor == "spez"
def test_add_praw_livestream_flags_method_live_subreddit_flag(self): parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_praw_livestream_flags(parser) args = parser.parse_args("--live-subreddit askreddit".split()) assert args.live_subreddit == "askreddit"
def test_parser_init_method_description_instance_variable(self): description = r""" Universal Reddit Scraper v3.2.1 - a comprehensive Reddit scraping tool Author: Joseph Lai Contact: [email protected] """ assert Cli.Parser()._description == description
def test_parse_args_method_no_args_were_entered(self): sys.argv = [sys.argv[0]] try: _, _ = Cli.Parser().parse_args() assert False except SystemExit: assert True
def test_add_analytics_method_frequencies_flag(self): test_subreddit_args = [["test_file"]] parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_analytics(parser) args = parser.parse_args("--frequencies test_file".split()) assert args.frequencies == test_subreddit_args
def test_add_analytics_method_wordcloud_flag(self): test_subreddit_args = [["test_file"]] parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_analytics(parser) args = parser.parse_args("--wordcloud test_file".split()) assert args.wordcloud == test_subreddit_args
def test_add_flags_method_comments_flag(self): test_subreddit_args = [["test_url", "10"]] parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_flags(parser) args = parser.parse_args("--comments test_url 10".split()) assert args.comments == test_subreddit_args
def test_add_flags_method_redditor_flag(self): test_subreddit_args = [["test_redditor", "10"]] parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_flags(parser) args = parser.parse_args("--redditor test_redditor 10".split()) assert args.redditor == test_subreddit_args
def test_add_flags_method_subreddit_flag(self): test_subreddit_args = [["test_subreddit", "h", "10"]] parser = MakeArgs.parser_for_testing_cli() Cli.Parser()._add_flags(parser) args = parser.parse_args("--subreddit test_subreddit h 10".split()) assert args.subreddit == test_subreddit_args
def test_parse_args_method_comments_and_csv_flags(self): sys.argv = [sys.argv[0]] input_args = ["--comments", "test_url", "10", "--csv"] for arg in input_args: sys.argv.append(arg) args, _ = Cli.Parser().parse_args() assert args.comments == [["test_url", "10"]] assert args.csv == True
def test_parse_args_method_redditor_and_csv_flags(self): sys.argv = [sys.argv[0]] input_args = ["--redditor", "test_redditor", "10", "--csv"] for arg in input_args: sys.argv.append(arg) args, _ = Cli.Parser().parse_args() assert args.redditor == [["test_redditor", "10"]] assert args.csv == True
def test_parse_args_method_examples_flag_was_included(self): sys.argv = [sys.argv[0]] sys.argv.append("-e") try: _, _ = Cli.Parser().parse_args() except SystemExit: assert True sys.argv = [sys.argv[0]]
def test_parse_args_method_subreddit_and_json_flags(self): input_args = ["--subreddit", "test_subreddit", "h", "10", "--json"] for arg in input_args: sys.argv.append(arg) sys.argv = sys.argv[1:] args, _ = Cli.Parser().parse_args() assert args.subreddit == [["test_subreddit", "h", "10"]] assert args.json == True
def test_parser_init_method_usage_instance_variable(self): usage = "$ Urs.py [-h] [-r SUBREDDIT [H|N|C|T|R|S] RESULTS_OR_KEYWORDS OPTIONAL_TIME_FILTER] [-u USER RESULTS] [-c URL RESULTS] [-b] [-y] [--csv|--json]" assert Cli.Parser()._usage == usage
def test_parser_init_method_examples_instance_variable(self): examples = r""" [[EXAMPLES]] [PRAW SCRAPING] Arguments: [-r <subreddit> <(h|n|c|t|r|s)> <n_results_or_keywords> [<optional_time_filter>]] [--rules] [-u <redditor> <n_results>] [-c <submission_url> <n_results>] [--raw] [-b] [-y] [--csv] All scrape results are exported to JSON by default. You can run all of these scrapers in one call. SUBREDDITS Get the first 10 posts in r/askreddit in the Hot category and export to JSON: $ ./Urs.py -r askreddit h 10 Search for "United States of America" in r/worldnews and export to CSV by including the `--csv` flag: $ ./Urs.py -r worldnews s "United States of America" --csv You can apply a time filter when scraping Subreddit categories Controversial, Top, or Search: (Scraping Search results from r/learnprogramming from the past month) $ ./Urs.py -r learnprogramming s "python developer" month You can skip the settings confirmation table and immediately scrape by including the `-y` flag: $ ./Urs.py -r cscareerquestions s "job" year -y You can add the Subreddit's rules in the scrape results by including the `--rules` flag. This only works when you export to JSON: $ ./Urs.py -r wallstreetbets t 25 year --rules You can also still use URS v1.0.0 (SUBREDDIT SCRAPING ONLY), but you cannot include this flag with any items besides export options: $ ./Urs.py -b $ ./Urs.py -b --csv REDDITORS Scraping 15 results from u/spez's Reddit account: $ ./Urs.py -u spez 15 SUBMISSION COMMENTS Scraping 25 comments from this r/TIFU post. Returns a structured JSON file: $ ./Urs.py -c https://www.reddit.com/r/tifu/comments/a99fw9/tifu_by_buying_everyone_an_ancestrydna_kit_and/ 25 Scraping all comments from the same r/TIFU post. Returns a structured JSON file: $ ./Urs.py -c https://www.reddit.com/r/tifu/comments/a99fw9/tifu_by_buying_everyone_an_ancestrydna_kit_and/ 0 You can also return comments in raw format by including the `--raw` flag. Ie. top-level first, followed by second-level, then third-level, etc.: $ ./Urs.py -c https://www.reddit.com/r/tifu/comments/a99fw9/tifu_by_buying_everyone_an_ancestrydna_kit_and/ 25 --raw $ ./Urs.py -c https://www.reddit.com/r/tifu/comments/a99fw9/tifu_by_buying_everyone_an_ancestrydna_kit_and/ 0 --raw [ANALYTICAL TOOLS] Arguments: [-f <file_path>] [-wc <file_path> [<optional_export_format>]] [--nosave] Word frequencies are exported to JSON by default. Wordclouds are exported to PNG by default. You can run both of these tools in one call. WORD FREQUENCIES Only return a count of words that are present in submission titles, bodies, and/or comments. An example file path is given: $ ./Urs.py -f ../scrapes/02-15-2021/subreddits/askreddit-hot-100-results.json You can also export to CSV instead by including the `--csv` flag: $ ./Urs.py -f ../scrapes/02-15-2021/subreddits/askreddit-hot-100-results.json --csv WORDCLOUD You can also generate a wordcloud based on word frequencies: $ ./Urs.py -wc ../scrapes/02-15-2021/subreddits/askreddit-hot-100-results.json OPTIONAL EXPORT FORMAT You can export to formats other than PNG by providing the format after the file path. See the help menu for a full list of options. Exporting the wordcloud to JPG: $ ./Urs.py -wc ../scrapes/02-15-2021/subreddits/askreddit-hot-100-results.json jpg DISPLAY INSTEAD OF SAVING If you do not wish to save the wordcloud to file, include the `--nosave` flag: $ ./Urs.py -wc ../scrapes/02-15-2021/subreddits/askreddit-hot-100-results.json --nosave """ assert Cli.Parser()._examples == examples