def generate_snapshots(base_url, overwrite=False):
    urls = utils.generate_urls()
    for url in urls:
        # For urls that end with a trailing slash, create them
        # as the index page of a subdirectory
        if url and url[0] == '/':
            url = url[1:]
        if not url:
            file_path = 'index'
            file_url = ''
        elif url[-1] == '/':
            file_path = url + 'index'
            file_url = url
        else:
            file_path = url
            file_url = url

        file_path = os.path.join(utils.HTML_DIR, file_path)
        if os.path.isdir(file_path):
            print 'Cannot have file_path that is directory: %s' % file_path
        if not overwrite and os.path.isfile(file_path):
            continue

        full_url = os.path.join(base_url, file_url)
        rendered_html = crawl_page(full_url)

        print 'Writing: %s' % url
        utils.write(file_path, rendered_html)
Exemple #2
0
def generate_snapshots():
    if len(sys.argv) < 2:
        sys.exit('Usage: %s <server-root>' % sys.argv[0])

    SERVER_ROOT = sys.argv[1]

    urls = utils.generate_urls()
    for url in urls:
        # For urls that end with a trailing slash, create them
        # as the index page of a subdirectory
        if url and url[0] == '/':
            url = url[1:]
        if not url:
            file_path = 'index'
            file_url = ''
        elif url[-1] == '/':
            file_path = url + 'index'
            file_url = url
        else:
            file_path = url
            file_url = url

        file_path = os.path.join(utils.HTML_DIR, file_path)
        if os.path.isdir(file_path):
            print 'Cannot have file_path that is directory: %s' % file_path
        if os.path.isfile(file_path):
            continue

        full_url = os.path.join(SERVER_ROOT, file_url)
        rendered_html = crawl_page(full_url)

        print 'Writing: %s' % url
        utils.write(file_path, rendered_html)
Exemple #3
0
def generate_snapshots(base_url, overwrite=False):
    urls = utils.generate_urls()
    for url in urls:
        # For urls that end with a trailing slash, create them
        # as the index page of a subdirectory
        if url and url[0] == '/':
            url = url[1:]
        if not url:
            file_path = 'index'
            file_url = ''
        elif url[-1] == '/':
            file_path = url + 'index'
            file_url = url
        else:
            file_path = url
            file_url = url

        file_path = os.path.join(utils.HTML_DIR, file_path)
        if os.path.isdir(file_path):
            print 'Cannot have file_path that is directory: %s' % file_path
        if not overwrite and os.path.isfile(file_path):
            continue

        full_url = os.path.join(base_url, file_url)
        rendered_html = crawl_page(full_url)

        print 'Writing: %s' % url
        utils.write(file_path, rendered_html)
Exemple #4
0
def generate_sitemap():
    if len(sys.argv) < 2:
        sys.exit('Usage: %s <server-root>' % sys.argv[0])

    SERVER_ROOT = sys.argv[1]
    relative_urls = utils.generate_urls()
    full_urls = [os.path.join(SERVER_ROOT, url) for url in relative_urls]
    print '\n'.join(full_urls)
Exemple #5
0
def generate_sitemap():
    if len(sys.argv) < 2:
        sys.exit('Usage: %s <server-root>' % sys.argv[0])

    SERVER_ROOT = sys.argv[1]
    relative_urls = utils.generate_urls()
    full_urls = [os.path.join(SERVER_ROOT, url) for url in relative_urls]
    print '\n'.join(full_urls)