def test_clone_and_del_repo(git_path):
    """
    test clone and del repo function.
    """
    # del repo if it exisits
    if os.path.exists(os.path.basename(git_path)):
        delete_repo(os.path.basename(git_path))

    # clone
    repo_path = clone_repo(git_path)
    assert os.path.exists(repo_path)
    assert os.path.basename(repo_path).startswith(os.path.basename(git_path))

    # delete should have return code of 0 (success)
    if not delete_repo(repo_path) == 0:
        raise AssertionError
Esempio n. 2
0
def test_check_run_save(tmp_path, retry_count):

    # init vars
    git_path = "https://github.com/urlstechie/urlchecker-test-repo"
    file_types = [".py", ".md"]
    print_all = True
    white_listed_urls = [
        "https://superkogito.github.io/figures/fig2.html",
        "https://superkogito.github.io/figures/fig4.html",
    ]
    white_listed_patterns = ["https://superkogito.github.io/tables"]
    timeout = 1
    force_pass = False

    # clone repo
    base_path = clone_repo(git_path)

    # get all file paths in subfolder specified
    base_path = os.path.join(base_path, "test_files")
    file_paths = get_file_paths(base_path, file_types)

    # check repo urls
    checker = UrlChecker(print_all=print_all)
    check_results = checker.run(
        file_paths=file_paths,
        white_listed_urls=white_listed_urls,
        white_listed_patterns=white_listed_patterns,
        retry_count=retry_count,
        timeout=timeout,
    )

    # Test saving to file
    output_file = os.path.join(str(tmp_path), "results.csv")
    assert not os.path.exists(output_file)
    saved_file = checker.save_results(output_file)
    assert os.path.exists(output_file)

    # Read in output file
    with open(saved_file, "r") as filey:
        lines = filey.readlines()

    # Header line has three items
    assert lines[0] == "URL,RESULT,FILENAME\n"

    # Ensure content looks okay
    for line in lines[1:]:
        url, result, filename = line.split(",")

        root = filename.split('/')[0]
        assert url.startswith("http")
        assert result in ["passed", "failed"]
        assert re.search("(.py|.md)$", filename)

    # Save with full path
    saved_file = checker.save_results(output_file, relative_paths=False)

    # Read in output file
    with open(saved_file, "r") as filey:
        lines = filey.readlines()

    # Ensure content looks okay
    for line in lines[1:]:
        url, result, filename = line.split(",")
        assert not filename.startswith(root)
Esempio n. 3
0
def main(args, extra):
    """
    Main entrypoint for running a check. We expect an args object with
    arguments from the main client. From here we determine the path
    to parse (or GitHub url to clone) and call the main check function
    under main/check.py

    Args:
      - args  : the argparse ArgParser with parsed args
      - extra : extra arguments not handled by the parser
    """
    path = args.path

    # Case 1: specify present working directory
    if not path or path == ".":
        path = os.getcwd()
        logging.debug("Path specified as present working directory, %s" % path)

    # Case 2: We need to clone
    elif re.search("^(git@|http)", path):
        logging.debug("Repository url %s detected, attempting clone" % path)
        path = clone_repo(path, branch=args.branch)

    # Add subfolder to path
    if args.subfolder:
        path = os.path.join(path, args.subfolder)

    # By the time we get here, a path must exist
    if not os.path.exists(path):
        sys.exit("Error %s does not exist." % path)

    # Parse file types, and excluded urls and files (includes absolute and patterns)
    file_types = args.file_types.split(",")
    exclude_urls = remove_empty(args.exclude_urls.split(","))
    exclude_patterns = remove_empty(args.exclude_patterns.split(","))
    exclude_files = remove_empty(args.exclude_files.split(","))
    files = remove_empty(args.files.split(","))

    # Alert user about settings
    print("           original path: %s" % args.path)
    print("              final path: %s" % path)
    print("               subfolder: %s" % args.subfolder)
    print("                  branch: %s" % args.branch)
    print("                 cleanup: %s" % args.cleanup)
    print("              file types: %s" % file_types)
    print("                   files: %s" % files)
    print("               print all: %s" % (not args.no_print))
    print("                 verbose: %s" % (args.verbose))
    print("           urls excluded: %s" % exclude_urls)
    print("   url patterns excluded: %s" % exclude_patterns)
    print("  file patterns excluded: %s" % exclude_files)
    print("              force pass: %s" % args.force_pass)
    print("             retry count: %s" % args.retry_count)
    print("                    save: %s" % args.save)
    print("                 timeout: %s" % args.timeout)

    # Instantiate a new checker with provided arguments
    checker = UrlChecker(
        path=path,
        file_types=file_types,
        include_patterns=files,
        exclude_files=exclude_files,
        print_all=not args.no_print,
    )
    check_results = checker.run(
        exclude_urls=exclude_urls,
        exclude_patterns=exclude_patterns,
        retry_count=args.retry_count,
        timeout=args.timeout,
    )

    # save results to file, if save indicated
    if args.save:
        checker.save_results(args.save)

    # delete repo when done, if requested
    if args.cleanup:
        logger.info("Cleaning up %s..." % path)
        delete_repo(path)

    # Case 1: We didn't find any urls to check
    if not check_results["failed"] and not check_results["passed"]:
        print("\n\n\U0001F937. No urls were collected.")
        sys.exit(0)

    # Case 2: We had errors, print them for the user
    if check_results["failed"]:
        if args.verbose:
            print("\n\U0001F914 Uh oh... The following urls did not pass:"******"failed"]:
                    print_failure(file_name + ":")
                    for url in result["failed"]:
                        print_failure("     " + url)
        else:
            print("\n\U0001F914 Uh oh... The following urls did not pass:"******"failed"]:
                print_failure(failed_url)

    # If we have failures and it's not a force pass, exit with 1
    if not args.force_pass and check_results["failed"]:
        sys.exit(1)

    # Finally, alert user if we are passing conditionally
    if check_results["failed"]:
        print("\n\U0001F928 Conditional pass force pass True.")
    else:
        print("\n\n\U0001F389 All URLS passed!")
    sys.exit(0)