Example #1
0
    def test_export_write_json(self):
        data = {
            "test_1": {
                "this": 1, 
                "is": 1, 
                "a": 1, 
                "test": 1
            },
            "test_2": {
                "this": 2, 
                "is": 2, 
                "a": 2, 
                "test": 2
            }
        }

        f_name = "export_write_json_test"
        f_type = "json"
        scrape = "subreddits"

        Export.export(data, f_name, f_type, scrape)

        with open(f"../scrapes/{date}/subreddits/export_write_json_test.json", "r", encoding = "utf-8") as test_json:
            test_dict = json.load(test_json)
            assert test_dict == data
Example #2
0
    def test_export_write_csv(self):
        data = {
            "this": [1, 2],
            "is": [3, 4],
            "a": [5, 6],
            "test": [7, 8]
        }

        f_name = "export_write_csv_test"
        f_type = "csv"
        scrape = "subreddits"

        Export.export(data, f_name, f_type, scrape)

        with open(f"../scrapes/{date}/subreddits/export_write_csv_test.csv", "r", newline="", encoding = "utf-8") as test_csv:
            reader = csv.reader(test_csv)
            test_dict = dict((header, []) for header in next(reader))
            for row in reader:
                try:
                    for row_index, key in enumerate(test_dict.keys()):
                        test_dict[key].append(int(row[row_index]))
                except IndexError:
                    continue

            assert test_dict == data
Example #3
0
    def export(data, f_type, filename):
        """
        Write data dictionary to JSON or CSV.

        Calls public methods found in external modules:

            Export.write_json()
            Export.write_csv()

        Parameters
        ----------
        data: dict
            Dictionary containing frequency data
        f_type: str
            String denoting the file format
        filename: str
            String denoting the filename

        Returns
        -------
        None
        """

        Export.write_json(data, filename) \
            if f_type == "json" \
            else Export.write_csv(data, filename)
Example #4
0
    def _determine_export(args, data, f_name):
        """
        Export either structured or raw comments.

        Calls a public method from an external module:

            Export.export()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI
        data: dict
            Dictionary containing all scraped data
        f_name: str
            String denoting the filename

        Returns
        -------
        None
        """

        if args.raw:
            export_status = "Exporting %s comments in raw format." % data["scrape_settings"]["n_results"]
            Halo().info(export_status)
            logging.info(export_status)
            Export.export(data, f_name, "json", "comments")
        else:
            export_status = "Exporting %s comments in structured format." % data["scrape_settings"]["n_results"]
            Halo().info(export_status)
            logging.info(export_status)
            Export.write_structured_comments(data, f_name)
Example #5
0
    def write(reddit, u_master):
        """
        Get, sort, then write scraped Redditor information to CSV or JSON.

        Calls a previously defined public method:

            GetInteractions.get()

        Calls a public method from an external module:

            NameFile().u_fname()

        Parameters
        ----------
        reddit: Reddit object
            Reddit instance created by PRAW API credentials
        u_master: dict
            Dictionary containing all scrape settings

        Returns
        -------
        None
        """

        for redditor, limit in u_master.items():
            data = GetInteractions.get(limit, reddit, redditor)
            f_name = NameFile().u_fname(limit, redditor)

            Export.export(data, f_name, "json", "redditors")

            print()
            Halo().succeed(Style.BRIGHT + Fore.GREEN +
                           "JSON file for u/%s created." % redditor)
            print()
Example #6
0
    def _determine_export(args, data, f_name):
        """
        Export to either CSV or JSON.

        Calls a public method from an external module:

            Export.export()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI
        data: dict
            Dictionary containing all scraped data
        f_name: str
            String denoting the filename

        Returns
        -------
        None
        """

        export_option = eo[1] \
            if not args.csv \
            else eo[0]

        Export.export(data, f_name, export_option, "comments")
Example #7
0
    def test_write_structured_comments(self):
        test_nodes = []

        first_node = MockNode("test one")
        EncodeNode().encode(first_node)
        second_node = MockNode("test two")
        EncodeNode().encode(second_node)
        third_node = MockNode("test three")
        EncodeNode().encode(third_node)

        first_node.replies.append(second_node)
        first_node.replies[0].replies.append(third_node)
        
        test_nodes.append(first_node)

        Export.write_structured_comments(test_nodes, "structured_comments_test")

        with open(f"../scrapes/{date}/comments/structured_comments_test.json", "r", encoding = "utf-8") as test_json:
            test_dict = json.load(test_json)
            assert test_dict == [{'string': 'test one', 'replies': [{'string': 'test two', 'replies': [{'string': 'test three', 'replies': []}]}]}]
Example #8
0
    def test_write_csv(self):
        filename = os.path.join(sys.path[0], "test_csv_writing.csv")
        overview = {
            "this": [1, 2],
            "is": [3, 4],
            "a": [5, 6],
            "test": [7, 8]
        }

        Export.write_csv(overview, filename)

        with open(filename, "r") as test_csv:
            reader = csv.reader(test_csv)
            test_dict = dict((header, []) for header in next(reader))
            for row in reader:
                for row_index, key in enumerate(test_dict.keys()):
                    test_dict[key].append(int(row[row_index]))

            assert test_dict == overview
        
        os.remove(filename)
Example #9
0
    def _write(args, cat_i, data, each_sub, sub):
        """
        Write submissions to file. 
        
        Calls methods from external modules:

            NameFile().r_fname()
            Export.export()

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI 
        cat_i: str
            String denoting n_results returned or keywords searched for
        data: dict
            Dictionary containing scraped Subreddit submission data 
        each_sub: list
            List of Subreddit scraping settings
        sub: str
            String denoting the Subreddit name

        Returns
        -------
        None
        """

        f_name = NameFile().r_fname(args, cat_i, each_sub, sub)

        export_option = "json" \
            if not args.csv \
            else "csv"

        Export.export(data, f_name, export_option, "subreddits")

        print()
        Halo(color="green",
             text=Style.BRIGHT + Fore.GREEN +
             f"{export_option.upper()} file for r/{sub} created.").succeed()
        print()
Example #10
0
    def test_export_write_csv(self):
        data = {
            "this": [1, 2],
            "is": [3, 4],
            "a": [5, 6],
            "test": [7, 8]
        }

        f_name = "export_write_csv_test"
        f_type = "csv"
        scrape = "subreddits"

        Export.export(data, f_name, f_type, scrape)

        with open("../scrapes/%s/subreddits/export_write_csv_test.csv" % date, "r") as test_csv:
            reader = csv.reader(test_csv)
            test_dict = dict((header, []) for header in next(reader))
            for row in reader:
                for row_index, key in enumerate(test_dict.keys()):
                    test_dict[key].append(int(row[row_index]))

            assert test_dict == data
Example #11
0
    def _determine_export(args, data, f_name):
        """
        Export to either CSV or JSON.

        Parameters
        ----------
        args: Namespace
            Namespace object containing all arguments that were defined in the CLI 
        data: dict
            Dictionary containing scraped Subreddit submission data 
        f_name: str
            String denoting the filename

        Returns
        -------
        None
        """

        export_option = eo[1] \
            if not args.csv \
            else eo[0]

        Export.export(data, f_name, export_option, "subreddits")
Example #12
0
    def test_write_json(self):
        filename = os.path.join(sys.path[0], "test_json_writing.json")
        overview = {
            "test_1": {
                "this": 1, 
                "is": 1, 
                "a": 1, 
                "test": 1
            },
            "test_2": {
                "this": 2, 
                "is": 2, 
                "a": 2, 
                "test": 2
            }
        }

        Export.write_json(overview, filename)

        with open(filename, "r", encoding = "utf-8") as test_json:
            test_dict = json.load(test_json)
            assert test_dict == overview
        
        os.remove(filename)
Example #13
0
    def test_get_filename_extension_returns_comments_json(self):
        f_name = "test"
        f_type = "json"

        assert Export._get_filename_extension(f_name, f_type, "comments") == f"../scrapes/{date}/comments/{f_name}.json"
Example #14
0
    def test_get_filename_extension_returns_redditors_json(self):
        f_name = "test"
        f_type = "json"

        assert Export._get_filename_extension(f_name, f_type, "redditors") == f"../scrapes/{date}/redditors/{f_name}.json"
Example #15
0
    def test_get_filename_extension_returns_subreddits_json(self):
        f_name = "test"
        f_type = "json"

        assert Export._get_filename_extension(f_name, f_type, "subreddits") == "../scrapes/%s/subreddits/%s.json" % (date, f_name)
Example #16
0
    def test_get_filename_extension_returns_subreddits_csv(self):
        f_name = "test"
        f_type = "csv"

        assert Export._get_filename_extension(f_name, f_type, "subreddits") == f"../scrapes/{date}/subreddits/{f_name}.csv"
Example #17
0
    def test_get_filename_extension_returns_comments_csv(self):
        f_name = "test"
        f_type = "csv"

        assert Export._get_filename_extension(f_name, f_type, "comments") == "../scrapes/%s/comments/%s.csv" % (date, f_name)