def test_export_write_json(self): data = { "test_1": { "this": 1, "is": 1, "a": 1, "test": 1 }, "test_2": { "this": 2, "is": 2, "a": 2, "test": 2 } } f_name = "export_write_json_test" f_type = "json" scrape = "subreddits" Export.export(data, f_name, f_type, scrape) with open(f"../scrapes/{date}/subreddits/export_write_json_test.json", "r", encoding = "utf-8") as test_json: test_dict = json.load(test_json) assert test_dict == data
def test_export_write_csv(self): data = { "this": [1, 2], "is": [3, 4], "a": [5, 6], "test": [7, 8] } f_name = "export_write_csv_test" f_type = "csv" scrape = "subreddits" Export.export(data, f_name, f_type, scrape) with open(f"../scrapes/{date}/subreddits/export_write_csv_test.csv", "r", newline="", encoding = "utf-8") as test_csv: reader = csv.reader(test_csv) test_dict = dict((header, []) for header in next(reader)) for row in reader: try: for row_index, key in enumerate(test_dict.keys()): test_dict[key].append(int(row[row_index])) except IndexError: continue assert test_dict == data
def export(data, f_type, filename): """ Write data dictionary to JSON or CSV. Calls public methods found in external modules: Export.write_json() Export.write_csv() Parameters ---------- data: dict Dictionary containing frequency data f_type: str String denoting the file format filename: str String denoting the filename Returns ------- None """ Export.write_json(data, filename) \ if f_type == "json" \ else Export.write_csv(data, filename)
def _determine_export(args, data, f_name): """ Export either structured or raw comments. Calls a public method from an external module: Export.export() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI data: dict Dictionary containing all scraped data f_name: str String denoting the filename Returns ------- None """ if args.raw: export_status = "Exporting %s comments in raw format." % data["scrape_settings"]["n_results"] Halo().info(export_status) logging.info(export_status) Export.export(data, f_name, "json", "comments") else: export_status = "Exporting %s comments in structured format." % data["scrape_settings"]["n_results"] Halo().info(export_status) logging.info(export_status) Export.write_structured_comments(data, f_name)
def write(reddit, u_master): """ Get, sort, then write scraped Redditor information to CSV or JSON. Calls a previously defined public method: GetInteractions.get() Calls a public method from an external module: NameFile().u_fname() Parameters ---------- reddit: Reddit object Reddit instance created by PRAW API credentials u_master: dict Dictionary containing all scrape settings Returns ------- None """ for redditor, limit in u_master.items(): data = GetInteractions.get(limit, reddit, redditor) f_name = NameFile().u_fname(limit, redditor) Export.export(data, f_name, "json", "redditors") print() Halo().succeed(Style.BRIGHT + Fore.GREEN + "JSON file for u/%s created." % redditor) print()
def _determine_export(args, data, f_name): """ Export to either CSV or JSON. Calls a public method from an external module: Export.export() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI data: dict Dictionary containing all scraped data f_name: str String denoting the filename Returns ------- None """ export_option = eo[1] \ if not args.csv \ else eo[0] Export.export(data, f_name, export_option, "comments")
def test_write_structured_comments(self): test_nodes = [] first_node = MockNode("test one") EncodeNode().encode(first_node) second_node = MockNode("test two") EncodeNode().encode(second_node) third_node = MockNode("test three") EncodeNode().encode(third_node) first_node.replies.append(second_node) first_node.replies[0].replies.append(third_node) test_nodes.append(first_node) Export.write_structured_comments(test_nodes, "structured_comments_test") with open(f"../scrapes/{date}/comments/structured_comments_test.json", "r", encoding = "utf-8") as test_json: test_dict = json.load(test_json) assert test_dict == [{'string': 'test one', 'replies': [{'string': 'test two', 'replies': [{'string': 'test three', 'replies': []}]}]}]
def test_write_csv(self): filename = os.path.join(sys.path[0], "test_csv_writing.csv") overview = { "this": [1, 2], "is": [3, 4], "a": [5, 6], "test": [7, 8] } Export.write_csv(overview, filename) with open(filename, "r") as test_csv: reader = csv.reader(test_csv) test_dict = dict((header, []) for header in next(reader)) for row in reader: for row_index, key in enumerate(test_dict.keys()): test_dict[key].append(int(row[row_index])) assert test_dict == overview os.remove(filename)
def _write(args, cat_i, data, each_sub, sub): """ Write submissions to file. Calls methods from external modules: NameFile().r_fname() Export.export() Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI cat_i: str String denoting n_results returned or keywords searched for data: dict Dictionary containing scraped Subreddit submission data each_sub: list List of Subreddit scraping settings sub: str String denoting the Subreddit name Returns ------- None """ f_name = NameFile().r_fname(args, cat_i, each_sub, sub) export_option = "json" \ if not args.csv \ else "csv" Export.export(data, f_name, export_option, "subreddits") print() Halo(color="green", text=Style.BRIGHT + Fore.GREEN + f"{export_option.upper()} file for r/{sub} created.").succeed() print()
def test_export_write_csv(self): data = { "this": [1, 2], "is": [3, 4], "a": [5, 6], "test": [7, 8] } f_name = "export_write_csv_test" f_type = "csv" scrape = "subreddits" Export.export(data, f_name, f_type, scrape) with open("../scrapes/%s/subreddits/export_write_csv_test.csv" % date, "r") as test_csv: reader = csv.reader(test_csv) test_dict = dict((header, []) for header in next(reader)) for row in reader: for row_index, key in enumerate(test_dict.keys()): test_dict[key].append(int(row[row_index])) assert test_dict == data
def _determine_export(args, data, f_name): """ Export to either CSV or JSON. Parameters ---------- args: Namespace Namespace object containing all arguments that were defined in the CLI data: dict Dictionary containing scraped Subreddit submission data f_name: str String denoting the filename Returns ------- None """ export_option = eo[1] \ if not args.csv \ else eo[0] Export.export(data, f_name, export_option, "subreddits")
def test_write_json(self): filename = os.path.join(sys.path[0], "test_json_writing.json") overview = { "test_1": { "this": 1, "is": 1, "a": 1, "test": 1 }, "test_2": { "this": 2, "is": 2, "a": 2, "test": 2 } } Export.write_json(overview, filename) with open(filename, "r", encoding = "utf-8") as test_json: test_dict = json.load(test_json) assert test_dict == overview os.remove(filename)
def test_get_filename_extension_returns_comments_json(self): f_name = "test" f_type = "json" assert Export._get_filename_extension(f_name, f_type, "comments") == f"../scrapes/{date}/comments/{f_name}.json"
def test_get_filename_extension_returns_redditors_json(self): f_name = "test" f_type = "json" assert Export._get_filename_extension(f_name, f_type, "redditors") == f"../scrapes/{date}/redditors/{f_name}.json"
def test_get_filename_extension_returns_subreddits_json(self): f_name = "test" f_type = "json" assert Export._get_filename_extension(f_name, f_type, "subreddits") == "../scrapes/%s/subreddits/%s.json" % (date, f_name)
def test_get_filename_extension_returns_subreddits_csv(self): f_name = "test" f_type = "csv" assert Export._get_filename_extension(f_name, f_type, "subreddits") == f"../scrapes/{date}/subreddits/{f_name}.csv"
def test_get_filename_extension_returns_comments_csv(self): f_name = "test" f_type = "csv" assert Export._get_filename_extension(f_name, f_type, "comments") == "../scrapes/%s/comments/%s.csv" % (date, f_name)