Example #1
0
def test_locally(local_folder_path, config_fname):
    # init config parser
    config = configparser.ConfigParser()
    config.read(config_fname)

    # read input variables
    git_path = local_folder_path
    file_types = config["DEFAULT"]["file_types_test_values"].split(",")
    print_all = True
    white_listed_urls = config["DEFAULT"]["white_listed_test_urls"].split(",")
    white_listed_patterns = config["DEFAULT"][
        "white_listed__test_patterns"].split(",")

    # get all file paths
    file_paths = get_file_paths(git_path, file_types)

    # check repo urls
    checker = UrlChecker(print_all=print_all)
    checker.run(
        file_paths=file_paths,
        white_listed_urls=white_listed_urls,
        white_listed_patterns=white_listed_patterns,
        retry_count=1,
    )
    print("Done.")
Example #2
0
    def __init__(
        self,
        path: str = None,
        file_types: List[str] = None,
        exclude_files: List[str] = None,
        print_all: bool = True,
        include_patterns: List[str] = None,
    ):
        """
        initiate a url checker. At init we take in preferences for
        file extensions, excluding preferences, and other initial
        parameters to run a url check.

        Args:
            - path              (str) : full path to the root folder to check. If not defined, no file_paths are parsed.
            - file_types       (list) : types of files to scan for links.
            - print_all        (bool) : control var for whether to print all checked file names or only the ones with urls.
            - exclude_files    (list) : list of excluded files and patterns for flies.
            - include_patterns (list) : list of files and patterns to check.
        """
        # Initiate results object, and checks lookup (holds UrlCheck) for each file
        self.results = {
            "passed": set(),
            "failed": set(),
            "excluded": set(),
        }  # type: Dict[str, set]

        # Results organized by filename
        self.checks = {}  # type: Dict[str, Dict]

        # Save run parameters
        self.exclude_files = exclude_files or []
        self.include_patterns = include_patterns or []
        self.print_all = print_all
        self.path = path
        self.file_types = file_types or [".py", ".md"]
        self.file_paths = []

        # get all file paths if a path is defined
        if path:

            # Exit early if path not defined
            if not os.path.exists(path):
                sys.exit("%s does not exist." % path)

            self.file_paths = fileproc.get_file_paths(
                base_path=path,
                file_types=self.file_types,
                exclude_files=self.exclude_files,
                include_patterns=self.include_patterns,
            )
Example #3
0
    def __init__(
        self,
        path=None,
        file_types=None,
        white_listed_files=None,
        print_all=True,
        include_patterns=None,
    ):
        """
        initiate a url checker. At init we take in preferences for
        file extensions, white listing preferences, and other initial
        parameters to run a url check.

        Args:
            - path                   (str) : full path to the root folder to check. If not defined, no file_paths are parsed
            - print_all              (str) : control var for whether to print all checked file names or only the ones with urls.
            - white_listed_files    (list) : list of white-listed files and patterns for flies.
            - include_patterns      (list) : list of files and patterns to check.
        """
        # Initiate results object, and checks lookup (holds UrlCheck) for each file
        self.results = {
            "passed": set(),
            "failed": set(),
            "white_listed": set()
        }
        self.checks = {}

        # Save run parameters
        self.white_listed_files = white_listed_files or []
        self.include_patterns = include_patterns or []
        self.print_all = print_all
        self.path = path
        self.file_types = file_types or [".py", ".md"]
        self.file_paths = []

        # get all file paths if a path is defined
        if path:

            # Exit early if path not defined
            if not os.path.exists(path):
                sys.exit("%s does not exist." % path)

            self.file_paths = fileproc.get_file_paths(
                include_patterns=self.include_patterns,
                base_path=path,
                file_types=self.file_types,
                white_listed_files=self.white_listed_files,
            )
Example #4
0
def test_get_file_paths(base_path, file_types):
    """
    get path to all files under a give directory and its subfolders.

    Args:
        base_path   (str) : base path.
        file_types (list) : list of file extensions to accept.

    Returns:
        list of file paths.
    """
    file_paths = get_file_paths(base_path, file_types)
    expected_paths = [
        [
            "tests/test_files/sample_test_file.md",
            "tests/test_files/sample_test_file.py",
        ],
        [
            "tests/test_files/sample_test_file.py",
            "tests/test_files/sample_test_file.md",
        ],
    ]
    # assert
    assert file_paths in expected_paths
Example #5
0
def test_check_run_save(tmp_path, retry_count):

    # init vars
    git_path = "https://github.com/urlstechie/urlchecker-test-repo"
    file_types = [".py", ".md"]
    print_all = True
    white_listed_urls = [
        "https://superkogito.github.io/figures/fig2.html",
        "https://superkogito.github.io/figures/fig4.html",
    ]
    white_listed_patterns = ["https://superkogito.github.io/tables"]
    timeout = 1
    force_pass = False

    # clone repo
    base_path = clone_repo(git_path)

    # get all file paths in subfolder specified
    base_path = os.path.join(base_path, "test_files")
    file_paths = get_file_paths(base_path, file_types)

    # check repo urls
    checker = UrlChecker(print_all=print_all)
    check_results = checker.run(
        file_paths=file_paths,
        white_listed_urls=white_listed_urls,
        white_listed_patterns=white_listed_patterns,
        retry_count=retry_count,
        timeout=timeout,
    )

    # Test saving to file
    output_file = os.path.join(str(tmp_path), "results.csv")
    assert not os.path.exists(output_file)
    saved_file = checker.save_results(output_file)
    assert os.path.exists(output_file)

    # Read in output file
    with open(saved_file, "r") as filey:
        lines = filey.readlines()

    # Header line has three items
    assert lines[0] == "URL,RESULT,FILENAME\n"

    # Ensure content looks okay
    for line in lines[1:]:
        url, result, filename = line.split(",")

        root = filename.split('/')[0]
        assert url.startswith("http")
        assert result in ["passed", "failed"]
        assert re.search("(.py|.md)$", filename)

    # Save with full path
    saved_file = checker.save_results(output_file, relative_paths=False)

    # Read in output file
    with open(saved_file, "r") as filey:
        lines = filey.readlines()

    # Ensure content looks okay
    for line in lines[1:]:
        url, result, filename = line.split(",")
        assert not filename.startswith(root)