def test_locally(local_folder_path, config_fname): # init config parser config = configparser.ConfigParser() config.read(config_fname) # read input variables git_path = local_folder_path file_types = config["DEFAULT"]["file_types_test_values"].split(",") print_all = True white_listed_urls = config["DEFAULT"]["white_listed_test_urls"].split(",") white_listed_patterns = config["DEFAULT"][ "white_listed__test_patterns"].split(",") # get all file paths file_paths = get_file_paths(git_path, file_types) # check repo urls checker = UrlChecker(print_all=print_all) checker.run( file_paths=file_paths, white_listed_urls=white_listed_urls, white_listed_patterns=white_listed_patterns, retry_count=1, ) print("Done.")
def __init__( self, path: str = None, file_types: List[str] = None, exclude_files: List[str] = None, print_all: bool = True, include_patterns: List[str] = None, ): """ initiate a url checker. At init we take in preferences for file extensions, excluding preferences, and other initial parameters to run a url check. Args: - path (str) : full path to the root folder to check. If not defined, no file_paths are parsed. - file_types (list) : types of files to scan for links. - print_all (bool) : control var for whether to print all checked file names or only the ones with urls. - exclude_files (list) : list of excluded files and patterns for flies. - include_patterns (list) : list of files and patterns to check. """ # Initiate results object, and checks lookup (holds UrlCheck) for each file self.results = { "passed": set(), "failed": set(), "excluded": set(), } # type: Dict[str, set] # Results organized by filename self.checks = {} # type: Dict[str, Dict] # Save run parameters self.exclude_files = exclude_files or [] self.include_patterns = include_patterns or [] self.print_all = print_all self.path = path self.file_types = file_types or [".py", ".md"] self.file_paths = [] # get all file paths if a path is defined if path: # Exit early if path not defined if not os.path.exists(path): sys.exit("%s does not exist." % path) self.file_paths = fileproc.get_file_paths( base_path=path, file_types=self.file_types, exclude_files=self.exclude_files, include_patterns=self.include_patterns, )
def __init__( self, path=None, file_types=None, white_listed_files=None, print_all=True, include_patterns=None, ): """ initiate a url checker. At init we take in preferences for file extensions, white listing preferences, and other initial parameters to run a url check. Args: - path (str) : full path to the root folder to check. If not defined, no file_paths are parsed - print_all (str) : control var for whether to print all checked file names or only the ones with urls. - white_listed_files (list) : list of white-listed files and patterns for flies. - include_patterns (list) : list of files and patterns to check. """ # Initiate results object, and checks lookup (holds UrlCheck) for each file self.results = { "passed": set(), "failed": set(), "white_listed": set() } self.checks = {} # Save run parameters self.white_listed_files = white_listed_files or [] self.include_patterns = include_patterns or [] self.print_all = print_all self.path = path self.file_types = file_types or [".py", ".md"] self.file_paths = [] # get all file paths if a path is defined if path: # Exit early if path not defined if not os.path.exists(path): sys.exit("%s does not exist." % path) self.file_paths = fileproc.get_file_paths( include_patterns=self.include_patterns, base_path=path, file_types=self.file_types, white_listed_files=self.white_listed_files, )
def test_get_file_paths(base_path, file_types): """ get path to all files under a give directory and its subfolders. Args: base_path (str) : base path. file_types (list) : list of file extensions to accept. Returns: list of file paths. """ file_paths = get_file_paths(base_path, file_types) expected_paths = [ [ "tests/test_files/sample_test_file.md", "tests/test_files/sample_test_file.py", ], [ "tests/test_files/sample_test_file.py", "tests/test_files/sample_test_file.md", ], ] # assert assert file_paths in expected_paths
def test_check_run_save(tmp_path, retry_count): # init vars git_path = "https://github.com/urlstechie/urlchecker-test-repo" file_types = [".py", ".md"] print_all = True white_listed_urls = [ "https://superkogito.github.io/figures/fig2.html", "https://superkogito.github.io/figures/fig4.html", ] white_listed_patterns = ["https://superkogito.github.io/tables"] timeout = 1 force_pass = False # clone repo base_path = clone_repo(git_path) # get all file paths in subfolder specified base_path = os.path.join(base_path, "test_files") file_paths = get_file_paths(base_path, file_types) # check repo urls checker = UrlChecker(print_all=print_all) check_results = checker.run( file_paths=file_paths, white_listed_urls=white_listed_urls, white_listed_patterns=white_listed_patterns, retry_count=retry_count, timeout=timeout, ) # Test saving to file output_file = os.path.join(str(tmp_path), "results.csv") assert not os.path.exists(output_file) saved_file = checker.save_results(output_file) assert os.path.exists(output_file) # Read in output file with open(saved_file, "r") as filey: lines = filey.readlines() # Header line has three items assert lines[0] == "URL,RESULT,FILENAME\n" # Ensure content looks okay for line in lines[1:]: url, result, filename = line.split(",") root = filename.split('/')[0] assert url.startswith("http") assert result in ["passed", "failed"] assert re.search("(.py|.md)$", filename) # Save with full path saved_file = checker.save_results(output_file, relative_paths=False) # Read in output file with open(saved_file, "r") as filey: lines = filey.readlines() # Ensure content looks okay for line in lines[1:]: url, result, filename = line.split(",") assert not filename.startswith(root)