Ejemplo n.º 1
0
def validate_html(directory, args):
    """Validate all the HTML files in a directory."""
    # pylint: disable=import-outside-toplevel
    from beancount.utils import scrape  # To avoid lxml dependency.

    files, missing, empty = scrape.validate_local_links_in_dir(directory)
    logging.info('%d files processed', len(files))
    for target in missing:
        logging.error('Missing %s', target)
    for target in empty:
        logging.error('Empty %s', target)
Ejemplo n.º 2
0
def do_validate_html(directory, args):
    """Validate all the HTML files under a directory hierarchy.

    Args:
      directory: A string, the root directory whose contents to validate.
      args: A tuple of the rest of arguments.
    """
    from beancount.utils import scrape
    files, missing, empty = scrape.validate_local_links_in_dir(directory)
    logging.info('%d files processed', len(files))
    for target in missing:
        logging.error('Missing %s', target)
    for target in empty:
        logging.error('Empty %s', target)
Ejemplo n.º 3
0
def main():
    parser = version.ArgumentParser(description=__doc__)

    web_group = web.add_web_arguments(parser)
    web_group.set_defaults(port=9475)

    group = parser.add_argument_group("Bake process arguments")

    group.add_argument('output',
                       help=('The output directory or archive name. If you '
                             'specify a filename with a well-known extension,'
                             'we automatically archive the fetched directory '
                             'contents to this archive name and delete them.'))

    # In order to be able to bake in a reasonable amount of time, we need to
    # remove some pages; you can use this switch to do that.
    group.add_argument('--render-all-pages',
                       '--full',
                       action='store_true',
                       help=("Don't ignore some of the more numerious pages, "
                             "like monthly reports."))

    opts = parser.parse_args()

    # Figure out the archival method.
    output_directory, extension = file_utils.path_greedy_split(opts.output)
    if extension:
        try:
            archival_command = ARCHIVERS[extension]
        except KeyError as exc:
            raise SystemExit(
                "ERROR: Unknown archiver type '{}'".format(extension)) from exc
    else:
        archival_command = None

    # Check pre-conditions on input/output filenames.
    if not path.exists(opts.filename):
        raise SystemExit("ERROR: Missing input file '{}'".format(
            opts.filename))
    if path.exists(opts.output):
        raise SystemExit("ERROR: Output path already exists '{}'".format(
            opts.output))
    if path.exists(output_directory):
        raise SystemExit("ERROR: Output directory already exists '{}'".format(
            output_directory))

    # Bake to a directory hierarchy of files with local links.
    bake_to_directory(opts, output_directory, opts.render_all_pages)

    # Verify the bake output files. This is just a sanity checking step.
    # You can also use "bean-doctor validate_html <file> to run this manually.
    logging.info('Validating HTML output files & links.')
    files, missing, empty = scrape.validate_local_links_in_dir(
        output_directory)
    logging.info('Validation: %d files processed', len(files))
    for target in missing:
        logging.error("Validation error: Missing '%s'", target)
    for target in empty:
        logging.error("Validation error: Empty '%s'", target)

    # Archive if requested.
    if archival_command is not None:
        # Normalize the paths and ensure sanity before we start compression.
        output_directory = path.abspath(output_directory)
        archive_filename = path.abspath(opts.output)
        if not path.exists(output_directory):
            raise IOError("Directory to archive '{}' does not exist".format(
                output_directory))
        if path.exists(archive_filename):
            raise IOError("Output archive name '{}' already exists".format(
                archive_filename))

        # Dispatch to a particular compressor.
        if isinstance(archival_command, str):
            archive(archival_command, output_directory, archive_filename, True)
        elif callable(archival_command):
            archival_command(output_directory, archive_filename)

        # Delete the output directory.
        shutil.rmtree(output_directory)

    print("Output in '{}'".format(opts.output))