def generate_snapshots(base_url, overwrite=False): urls = utils.generate_urls() for url in urls: # For urls that end with a trailing slash, create them # as the index page of a subdirectory if url and url[0] == '/': url = url[1:] if not url: file_path = 'index' file_url = '' elif url[-1] == '/': file_path = url + 'index' file_url = url else: file_path = url file_url = url file_path = os.path.join(utils.HTML_DIR, file_path) if os.path.isdir(file_path): print 'Cannot have file_path that is directory: %s' % file_path if not overwrite and os.path.isfile(file_path): continue full_url = os.path.join(base_url, file_url) rendered_html = crawl_page(full_url) print 'Writing: %s' % url utils.write(file_path, rendered_html)
def generate_snapshots(): if len(sys.argv) < 2: sys.exit('Usage: %s <server-root>' % sys.argv[0]) SERVER_ROOT = sys.argv[1] urls = utils.generate_urls() for url in urls: # For urls that end with a trailing slash, create them # as the index page of a subdirectory if url and url[0] == '/': url = url[1:] if not url: file_path = 'index' file_url = '' elif url[-1] == '/': file_path = url + 'index' file_url = url else: file_path = url file_url = url file_path = os.path.join(utils.HTML_DIR, file_path) if os.path.isdir(file_path): print 'Cannot have file_path that is directory: %s' % file_path if os.path.isfile(file_path): continue full_url = os.path.join(SERVER_ROOT, file_url) rendered_html = crawl_page(full_url) print 'Writing: %s' % url utils.write(file_path, rendered_html)
def generate_sitemap(): if len(sys.argv) < 2: sys.exit('Usage: %s <server-root>' % sys.argv[0]) SERVER_ROOT = sys.argv[1] relative_urls = utils.generate_urls() full_urls = [os.path.join(SERVER_ROOT, url) for url in relative_urls] print '\n'.join(full_urls)