def _make_livestream_dir(split_stream_info): """ Make the `livestream` directory within the `scrapes/[DATE]` directory. Calls public methods from an external module: InitializeDirectory.create_dirs() Parameters ---------- split_stream_info: list List containing stream information Returns ------- stream_directory: str String denoting the path to the directory in which the stream is saved """ if split_stream_info[0] == "r": sub_directory = "subreddits" elif split_stream_info[0] == "u": sub_directory = "redditors" stream_directory = f"../scrapes/{date}/livestream/{sub_directory}" InitializeDirectory.create_dirs(stream_directory) return stream_directory
def name_and_create_dir(self, args, file): """ Name the new file and create the analytics directory. Calls public methods from external modules: GetPath.name_file() InitializeDirectory.make_analytics_directory( Parameters ---------- args: Namespace Namespace object containing all arguments used in the CLI file: list List containing scrape files and file formats to generate wordcloud with Returns ------- f_type: str String denoting the file format filename: str String denoting the filename """ f_type = "csv" \ if args.csv \ else "json" date_dir, filename = GetPath.name_file(f_type, file[0], "frequencies") InitializeDirectory.make_analytics_directory(date_dir, "frequencies") return f_type, filename
def export(data, f_name, f_type, scrape): """ Write data to either CSV or JSON. Calls external module methods and previously defined private and public methods: InitializeDirectory.make_type_directory() Export._get_filename_extension() Export.write_json() Export.write_csv() Parameters ---------- data: dict Dictionary of scrape data f_name: str Filename f_type: str File type (.csv or .json) scrape: str Scrape type ("subreddits", "redditors", or "comments") Returns ------- None """ InitializeDirectory.make_type_directory(scrape) filename = Export._get_filename_extension(f_name, f_type, scrape) Export.write_json(data, filename) \ if f_type == eo[1] \ else Export.write_csv(data, filename)
def write_structured_comments(data, f_name): """ Write structured comments to JSON by using the custom JSONEncoder class with the `cls` parameter within `json.dumps()`. Calls a method from an external module: InitializeDirectory.create_dirs() Parameters ---------- data: dict Dictionary of scrape data f_name: str String denoting the filename Returns ------- None """ filename = Export._get_filename_extension(f_name, "json", "comments") InitializeDirectory.create_dirs("/".join(filename.split("/")[:-1])) with open(filename, "w", encoding = "utf-8") as results: json.dump(data, results, indent = 4, cls = EncodeNode)
def test_make_directory(self): InitializeDirectory.make_directory() assert True \ if os.path.isdir("../scrapes") == True \ and os.path.isdir("../scrapes/" + date) == True \ else False
def test_make_analytics_directory(self): tool_type = "wordcloud" InitializeDirectory.make_analytics_directory(date, tool_type) assert True \ if os.path.isdir("../scrapes/%s/analytics/%s" % (date, tool_type)) == True \ else False
def test_make_type_directory(self): scrape = "subreddit" InitializeDirectory.make_type_directory(scrape) assert True \ if os.path.isdir("../scrapes/%s/%s" % (date, scrape)) == True \ else False
def test_create_dirs_method(self): test_path = "../scrapes/test_dir/another_test_dir/a_final_dir" InitializeDirectory.create_dirs(test_path) assert True \ if os.path.isdir(test_path) \ else False
def test_create(self): destination = "../test_dir" InitializeDirectory._create(destination) if os.path.isdir(destination) == True: os.rmdir(destination) assert True else: assert False
class LogMain(): """ Decorator for logging URS runtime. Also handles KeyboardInterrupt and adds the event to the log if applicable. """ ### Makes directory in which the log and scraped files will be stored. InitializeDirectory.make_directory() ### Set directory path and log format. DIR_PATH = "../scrapes/%s" % date LOG_FORMAT = "[%(asctime)s] [%(levelname)s]: %(message)s" ### Configure logging settings. logging.basicConfig( filename = DIR_PATH + "/urs.log", format = LOG_FORMAT, level = logging.INFO ) @staticmethod def master_timer(function): """ Wrapper for logging the amount of time it took to execute main(). Handle KeyboardInterrupt if user cancels URS. Parameters ---------- function: function() Run method within the wrapper Exceptions ---------- KeyboardInterrupt: Raised if user cancels URS Returns ------- wrapper: function() Return the wrapper method that runs the method passed into the decorator """ def wrapper(*args): logging.info("INITIALIZING URS.") logging.info("") start = time.time() try: function(*args) except KeyboardInterrupt: print(Style.BRIGHT + Fore.RED + "\n\nURS ABORTED BY USER.\n") logging.warning("") logging.warning("URS ABORTED BY USER.\n") quit() logging.info("URS COMPLETED IN %.2f SECONDS.\n" % (time.time() - start)) return wrapper
def get_scrape_type(scrape_file, tool): """ Get the name of the scrape-specific directory in which the data is stored and create the directories within the `analytics` folder. Parameters ---------- scrape_file: str String denoting the filepath tool: str String denoting the tool type Exceptions ---------- TypeError: Raised if the file is not JSON or if the file resides in the `analytics` directory Returns ------- analytics_dir: str String denoting the path to the directory in which the analytical data will be written scrape_dir: str String denoting the scrape-specific directory """ file_path = Path(scrape_file) scrape_dir = list(file_path.parts)[file_path.parts.index("scrapes") + 2] if file_path.name.split(".")[1] != "json" or scrape_dir == "analytics": raise TypeError split_analytics_dir = \ list(file_path.parts)[:file_path.parts.index("scrapes") + 2] + \ ["analytics", tool] + \ list(file_path.parts)[file_path.parts.index("scrapes") + 2:-1] analytics_dir = "/".join(split_analytics_dir) InitializeDirectory.create_dirs(analytics_dir) return analytics_dir, scrape_dir
def write_structured_comments(data, f_name): """ Write structured comments to JSON by using the custom JSONEncoder class with the `cls` parameter within `json.dumps()`. Parameters ---------- data: dict Dictionary of scrape data f_name: str String denoting the filename Returns ------- None """ InitializeDirectory.make_type_directory("comments") filename = Export._get_filename_extension(f_name, "json", "comments") with open(filename, "w", encoding = "utf-8") as results: json.dump(data, results, indent = 4, cls = EncodeNode)
def save_wordcloud(self, file, wc): """ Save wordcloud to file. Calls public methods from external modules: GetPath.name_file() InitializeDirectory.make_analytics_directory() Parameters ---------- file: list List containing scrape files and file formats to generate wordcloud with wc: WordCloud Wordcloud instance Returns ------- filename: str String denoting the filename for the exported wordcloud """ date_dir, filename = GetPath.name_file(file[1], file[0], "wordclouds") export_status = Status( Style.BRIGHT + Fore.GREEN + "Wordcloud exported to %s." % "/".join(filename.split("/")[filename.split("/").index("scrapes"):]), "Exporting wordcloud.", "white" ) export_status.start() InitializeDirectory.make_analytics_directory(date_dir, "wordclouds") wc.to_file(filename) export_status.succeed() print() return filename
def export(data, f_name, f_type, scrape): """ Write data to either CSV or JSON. Calls a method from an external module: InitializeDirectory.create_dirs() Calls previously defined private and public methods: Export._get_filename_extension() Export.write_json() Export.write_csv() Parameters ---------- data: dict Dictionary of scrape data f_name: str Filename f_type: str File type (.csv or .json) scrape: str Scrape type ("subreddits", "redditors", or "comments") Returns ------- None """ filename = Export._get_filename_extension(f_name, f_type, scrape) InitializeDirectory.create_dirs("/".join(filename.split("/")[:-1])) Export.write_json(data, filename) \ if f_type == "json" \ else Export.write_csv(data, filename)