Ejemplo n.º 1
0
    def run_urs(self):
        """
        Switch for running all URS tools.

        Calls previously defined public methods:

            PRAW validation:

                Validation.validate_user()

            PRAW scrapers:

                RunSubreddit.run()
                RunRedditor.run()
                RunComments.run()
                RunBasic.run()
            
            Analytical tools:

                GenerateFrequencies.generate()
                GenerateWordcloud.generate()
        """

        if self._args.check:
            """
            Run rate limit check.
            """

            logging.info("RUNNING API CREDENTIALS CHECK.")
            logging.info("")

            Validation.validate_user(self._parser, self._reddit)

        elif self._args.subreddit or self._args.redditor or self._args.comments or self._args.basic:
            """
            Run PRAW scrapers.
            """
            
            Validation.validate_user(self._parser, self._reddit)

            if self._args.subreddit:
                RunSubreddit.run(self._args, self._parser, self._reddit)
            if self._args.redditor:
                RunRedditor.run(self._args, self._parser, self._reddit)
            if self._args.comments:
                RunComments.run(self._args, self._parser, self._reddit)
            elif self._args.basic:
                RunBasic.run(self._args, self._parser, self._reddit)
        
        elif self._args.frequencies or self._args.wordcloud:
            """
            Run analytical tools.
            """

            if self._args.frequencies:
                GenerateFrequencies.generate(self._args)
            if self._args.wordcloud:
                GenerateWordcloud.generate(self._args)
Ejemplo n.º 2
0
    def test_validate_user(self):
        parser = MakeArgs.make_scraper_args()
        reddit = Login.create_reddit_object()

        try:
            Validation.validate_user(parser, reddit)
            assert True
        except:
            assert False
Ejemplo n.º 3
0
    def test_check_redditor_both_valid_and_invalid_redditors(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = ["spez", "sdhfgiuoh3284th9enbsprgh8-w-wher9ghwe9hw49"]

        Validation._check_redditors(invalid, object_list, reddit, valid)

        assert len(valid) == 1
        assert len(invalid) == 1
Ejemplo n.º 4
0
    def test_check_redditor_only_valid_redditors(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = ["spez"]

        Validation._check_redditors(invalid, object_list, reddit, valid)

        assert len(valid) == 1
        assert not invalid
Ejemplo n.º 5
0
    def test_check_subreddits_only_valid_subreddits(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = ["askreddit", "wallstreetbets", "cscareerquestions"]

        Validation._check_subreddits(invalid, object_list, reddit, valid)

        assert not invalid
        assert len(valid) == 3
Ejemplo n.º 6
0
    def test_check_submissions_only_invalid_submissions(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = [
            "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/"
        ]

        Validation._check_submissions(invalid, object_list, reddit, valid)

        assert not valid
        assert len(invalid) == 1
Ejemplo n.º 7
0
    def test_check_submissions_only_valid_submissions(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = [
            "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/"
        ]

        Validation._check_submissions(invalid, object_list, reddit, valid)

        assert len(valid) == 1
        assert not invalid
Ejemplo n.º 8
0
    def test_check_submissions_both_valid_and_invalid_submissions(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = [
            "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/",
            "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/"
        ]

        Validation._check_submissions(invalid, object_list, reddit, valid)

        assert len(valid) == 1
        assert len(invalid) == 1
Ejemplo n.º 9
0
    def test_check_subreddits_only_invalid_subreddits(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = [
            "shdg8h342842h3gidbsfgjdbs",
            "asdfhauhwspf8912034812hudfghb979023974ht",
            "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l"
        ]

        Validation._check_subreddits(invalid, object_list, reddit, valid)

        assert not valid
        assert len(invalid) == 3
Ejemplo n.º 10
0
    def _find_subs(reddit, search_for):
        """
        Return a list of valid and invalid Subreddits.

        Calls a method from an external module:

            Validation.existence()

        Parameters
        ----------
        reddit: Reddit object
            Reddit instance created by PRAW API credentials
        search_for: str
            String denoting Subreddits to scrape for

        Returns
        -------
        subs: list
            List of valid Subreddits
        not_subs: list
            List of invalid Subreddits
        """

        search_for = " ".join(search_for.split())
        sub_list = [subreddit for subreddit in search_for.split(" ")]
        not_subs, subs = Validation.check_existence(sub_list, reddit, "subreddit")

        return not_subs, subs
Ejemplo n.º 11
0
    def test_check_subreddit_both_valid_and_invalid_subreddits(self):
        invalid, valid = [], []
        reddit = Login.create_reddit_object()

        object_list = [
            "askreddit", "wallstreetbets", "cscareerquestions",
            "shdg8h342842h3gidbsfgjdbs",
            "asdfhauhwspf8912034812hudfghb979023974ht",
            "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l",
            "u0893-45u238hdusafghudsgh982"
        ]

        Validation._check_subreddits(invalid, object_list, reddit, valid)

        assert len(valid) == 3
        assert len(invalid) == 4
Ejemplo n.º 12
0
    def _create_settings(args, parser, reddit):
        """
        Create settings for each user input. 
        
        Calls methods from an external modules:

            GetPRAWScrapeSettings().create_list()
            Validation.validate()
            GetPRAWScrapeSettings().get_settings()
            Global.make_list_dict()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI 
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials

        Returns
        -------
        s_master: dict
            Dictionary containing all scrape settings
        """

        sub_list = GetPRAWScrapeSettings().create_list(args, "subreddit")
        not_subs, subs = Validation.validate(sub_list, reddit, "subreddit")
        s_master = make_list_dict(subs)
        GetPRAWScrapeSettings().get_settings(args, not_subs, s_master,
                                             "subreddit")

        return s_master
Ejemplo n.º 13
0
    def _find_subs(parser, reddit, search_for):
        """
        Return a list of valid and invalid Subreddits.

        Calls a method from an external module:

            Validation.existence()

        Parameters
        ----------
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials
        search_for: str
            String denoting Subreddits to scrape for

        Returns
        -------
        subs: list
            List of valid Subreddits
        not_subs: list
            List of invalid Subreddits
        """

        search_for = " ".join(search_for.split())
        sub_list = [subreddit for subreddit in search_for.split(" ")]
        subs, not_subs = Validation.existence(s_t[0], sub_list, parser, reddit,
                                              s_t)

        return subs, not_subs
Ejemplo n.º 14
0
    def _set_info_and_object(args, reddit):
        """
        Set the stream information and Reddit object based on CLI args.

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI
        reddit: PRAW Reddit object

        Returns
        -------
        reddit_object: PRAW Subreddit or Redditor object
        stream_info: str
            String denoting the livestream information
        """

        if args.live_subreddit:
            PRAWTitles.lr_title()

            Validation.validate([args.live_subreddit], reddit, "subreddit")

            initial_message = f"Initializing Subreddit livestream for r/{args.live_subreddit}."
            
            stream_info = f"in r/{args.live_subreddit}"
            reddit_object = reddit.subreddit(args.live_subreddit)

        elif args.live_redditor:
            PRAWTitles.lu_title()

            Validation.validate([args.live_redditor], reddit, "redditor")

            initial_message = f"Initializing Redditor livestream for u/{args.live_redditor}."
            
            stream_info = f"by u/{args.live_redditor}"
            reddit_object = reddit.redditor(args.live_redditor)
        
        Halo().info(Fore.CYAN + Style.BRIGHT + initial_message)
        logging.info(initial_message + "..")
        Halo().info("New entries will appear when posted to Reddit.")

        return reddit_object, stream_info
Ejemplo n.º 15
0
    def test_validate_all_valid_reddit_objects(self):
        reddit = Login.create_reddit_object()

        object_list = ["askreddit", "wallstreetbets", "cscareerquestions"]

        scraper_type = "subreddit"

        invalid, valid = Validation.validate(object_list, reddit, scraper_type)

        assert len(valid) == 3
        assert not invalid
Ejemplo n.º 16
0
    def test_check_existence_both_valid_and_invalid_redditors(self):
        reddit = Login.create_reddit_object()

        object_list = ["spez", "sdhfgiuoh3284th9enbsprgh8-w-wher9ghwe9hw49"]

        scraper_type = "redditor"

        invalid, valid = Validation.check_existence(object_list, reddit,
                                                    scraper_type)

        assert len(valid) == 1
        assert len(invalid) == 1
Ejemplo n.º 17
0
    def test_check_existence_only_valid_redditors(self):
        reddit = Login.create_reddit_object()

        object_list = ["spez"]

        scraper_type = "redditor"

        invalid, valid = Validation.check_existence(object_list, reddit,
                                                    scraper_type)

        assert len(valid) == 1
        assert not invalid
Ejemplo n.º 18
0
    def test_check_existence_only_invalid_submissions(self):
        reddit = Login.create_reddit_object()

        object_list = [
            "https://www.reddit.com/r/heresaninvalidlinkjasdfhuwhrpguhpasdf/"
        ]

        scraper_type = "comments"

        invalid, valid = Validation.check_existence(object_list, reddit, scraper_type)

        assert not valid
        assert len(invalid) == 1
Ejemplo n.º 19
0
    def test_check_existence_only_valid_submissions(self):
        reddit = Login.create_reddit_object()

        object_list = [
            "https://www.reddit.com/r/announcements/comments/mcisdf/an_update_on_the_recent_issues_surrounding_a/"
        ]

        scraper_type = "comments"

        invalid, valid = Validation.check_existence(object_list, reddit, scraper_type)

        assert len(valid) == 1
        assert not invalid
Ejemplo n.º 20
0
    def test_check_existence_only_invalid_subreddits(self):
        reddit = Login.create_reddit_object()

        object_list = [
            "shdg8h342842h3gidbsfgjdbs",
            "asdfhauhwspf8912034812hudfghb979023974ht",
            "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l"
        ]

        scraper_type = "subreddit"

        invalid, valid = Validation.check_existence(object_list, reddit, scraper_type)

        assert not valid
        assert len(invalid) == 3
Ejemplo n.º 21
0
    def test_validate_all_invalid_reddit_objects_force_quit(self):
        reddit = Login.create_reddit_object()

        object_list = [
            "shdg8h342842h3gidbsfgjdbs",
            "asdfhauhwspf8912034812hudfghb979023974ht",
            "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l"
        ]

        scraper_type = "subreddit"

        try:
            _, _ = Validation.validate(object_list, reddit, scraper_type)
            assert False
        except SystemExit:
            assert True
Ejemplo n.º 22
0
    def test_validate_both_valid_and_invalid_reddit_objects(self):
        reddit = Login.create_reddit_object()

        object_list = [
            "askreddit", "wallstreetbets", "cscareerquestions",
            "shdg8h342842h3gidbsfgjdbs",
            "asdfhauhwspf8912034812hudfghb979023974ht",
            "xcvhcsxiuvbeidefgh3qw48tr324805tyasdguap;l"
        ]

        scraper_type = "subreddit"

        invalid, valid = Validation.validate(object_list, reddit, scraper_type)

        assert len(valid) == 3
        assert len(invalid) == 3
Ejemplo n.º 23
0
    def list_submissions(parser, post_list, reddit):
        """
        Check if submissions exist and list posts that are not found.

        Calls a method from an external module:

            Validation.existence()

        Parameters
        ----------
        parser: ArgumentParser
            argparse ArgumentParser object
        post_list: list
            List of submission URLs
        reddit: Reddit object
            Reddit instance created by PRAW API credentials

        Returns
        -------
        posts: list
            List of valid submission URLs
        """

        print("\nChecking if submission(s) exist...")
        logging.info("Validating submissions...")
        logging.info("")
        posts, not_posts = Validation.existence(s_t[2], post_list, parser,
                                                reddit, s_t)

        if not_posts:
            print(
                Fore.YELLOW + Style.BRIGHT +
                "\nThe following submissions were not found and will be skipped:"
            )
            print(Fore.YELLOW + Style.BRIGHT + "-" * 55)
            print(*not_posts, sep="\n")

            logging.warning("Failed to validate the following submissions:")
            logging.warning("%s" % (not_posts))
            logging.warning("Skipping.")
            logging.info("")

        if not posts:
            logging.critical("ALL SUBMISSIONS FAILED VALIDATION.")
            raise ValueError

        return not_posts, posts
Ejemplo n.º 24
0
    def list_redditors(parser, reddit, user_list):
        """
        Check if Redditors exist and list Redditors who are not found.

        Calls a public method from an external module:

            Validation.existence()

        Parameters
        ----------
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials
        user_list: list
            List of Redditors

        Returns
        -------
        users: list
            List of valid Redditors URLs
        """

        print("\nChecking if Redditor(s) exist...")
        logging.info("Validating Redditors...")
        logging.info("")
        users, not_users = Validation.existence(s_t[1], user_list, parser,
                                                reddit, s_t)

        if not_users:
            print(
                Fore.YELLOW + Style.BRIGHT +
                "\nThe following Redditors were not found and will be skipped:"
            )
            print(Fore.YELLOW + Style.BRIGHT + "-" * 59)
            print(*not_users, sep="\n")

            logging.warning("Failed to validate the following Redditors:")
            logging.warning("%s" % (not_users))
            logging.warning("Skipping.")
            logging.info("")

        if not users:
            logging.critical("ALL REDDITORS FAILED VALIDATION.")
            raise ValueError

        return not_users, users
Ejemplo n.º 25
0
    def list_subreddits(parser, reddit, s_t, sub_list):
        """
        Check if Subreddits exist and list invalid Subreddits if applicable.

        Calls a method from an external module:

            Validation.existence()

        Parameters
        ----------
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials
        s_t: list
            List of scraper types
        sub_list: list
            List of Redditors

        Returns
        -------
        subs: list
            List of valid Subreddits
        """

        print("\nChecking if Subreddit(s) exist...")
        logging.info("Validating Subreddits...")
        logging.info("")
        subs, not_subs = Validation().existence(s_t[0], sub_list, parser, reddit, s_t)
        
        if not_subs:
            print(Fore.YELLOW + Style.BRIGHT + "\nThe following Subreddits were not found and will be skipped:")
            print(Fore.YELLOW + Style.BRIGHT + "-" * 60)
            print(*not_subs, sep = "\n")

            logging.warning("Failed to validate the following Subreddits:")
            logging.warning("%s" % (not_subs))
            logging.warning("Skipping.")
            logging.info("")

        if not subs:
            logging.critical("ALL SUBREDDITS FAILED VALIDATION.")
            raise ValueError
        
        return not_subs, subs
Ejemplo n.º 26
0
    def run(args, parser, reddit):
        """
        Run comments scraper.

        Calls a previously defined public method:

            Write.write()

        Calls public methods from external modules:

            GetPRAWScrapeSettings().create_list()
            Validation.validate()
            GetPRAWScrapeSettings().get_settings()
            Global.make_none_dict()

            PRAWTitles.c_title()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials

        Returns
        -------
        c_master: dict
            Dictionary containing all submission comments scrape settings
        """

        PRAWTitles.c_title()

        post_list = GetPRAWScrapeSettings().create_list(args, "comments")
        not_posts, posts = Validation.validate(post_list, reddit, "comments")
        c_master = make_none_dict(posts)
        GetPRAWScrapeSettings().get_settings(args, not_posts, c_master,
                                             "comments")

        Write.write(args, c_master, reddit)

        return c_master
Ejemplo n.º 27
0
    def run(args, parser, reddit):
        """
        Get, sort, then write scraped Redditor information to CSV or JSON.

        Calls a previously defined public method:

            Write.write()

        Calls public methods from external modules: 

            GetPRAWScrapeSettings().create_list()
            Validation.validate()
            Global.make_none_dict()
            GetPRAWScrapeSettings().get_settings()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI 
        parser: ArgumentParser
            argparse ArgumentParser object
        reddit: Reddit object
            Reddit instance created by PRAW API credentials

        Returns
        -------
        u_master: dict
            Dictionary containing all Redditor scrape settings
        """

        PRAWTitles.u_title()

        user_list = GetPRAWScrapeSettings().create_list(args, "redditor")
        not_users, users = Validation.validate(user_list, parser, reddit,
                                               "redditor")
        u_master = make_none_dict(users)
        GetPRAWScrapeSettings().get_settings(args, not_users, u_master,
                                             "redditor")

        Write.write(args, reddit, u_master)

        return u_master
Ejemplo n.º 28
0
    def run_urs(self):
        """
        Switch for running all URS tools.

        Calls previously defined public methods:

            PRAW validation:

                Validation.validate_user()

            PRAW scrapers:

                RunSubreddit.run()
                RunRedditor.run()
                RunComments.run()
                RunBasic.run()

            PRAW livestream scrapers:

                Livestream.stream()
            
            Analytical tools:

                GenerateFrequencies.generate()
                GenerateWordcloud.generate()
        """

        if self._args.check:
            """
            Run rate limit check.
            """

            logging.info("RUNNING API CREDENTIALS CHECK.")
            logging.info("")

            Validation.validate_user(self._parser, self._reddit)

        elif self._args.tree:
            """
            Display visual directory tree for a date (default is the current date).
            """

            DateTree.display_tree(self._args.tree)

        elif self._args.subreddit or self._args.redditor or self._args.comments or self._args.basic:
            """
            Run PRAW scrapers.
            """

            Validation.validate_user(self._parser, self._reddit)

            if self._args.subreddit:
                RunSubreddit.run(self._args, self._parser, self._reddit)
            if self._args.redditor:
                RunRedditor.run(self._args, self._parser, self._reddit)
            if self._args.comments:
                RunComments.run(self._args, self._parser, self._reddit)
            elif self._args.basic:
                RunBasic.run(self._args, self._parser, self._reddit)

        elif self._args.live_subreddit or self._args.live_redditor:
            """
            Run PRAW livestream scrapers.
            """

            Validation.validate_user(self._parser, self._reddit)
            Livestream.stream(self._args, self._reddit)

        elif self._args.frequencies or self._args.wordcloud:
            """
            Run analytical tools.
            """

            if self._args.frequencies:
                GenerateFrequencies.generate(self._args)
            if self._args.wordcloud:
                GenerateWordcloud.generate(self._args)