def capture(url, wait_for_text='', selector='body', viewport_size='800x450', filename='capture.png'): """see https://hub.docker.com/r/phuslu/ghost.py/""" import ghost logging.info('create ghost.py Session') session = ghost.Session(ghost.Ghost(), viewport_size=tuple( map(int, viewport_size.split('x')))) logging.info('open %r', url) session.open(url) if wait_for_text: logging.info('wait_for_text %r', wait_for_text) session.wait_for_text(wait_for_text) else: logging.info('wait_for_page_loaded') session.wait_for_page_loaded() if '/' not in filename: filename = '/data/' + filename logging.info('capture selector=%r to %r', selector, filename) session.capture_to(filename, selector=selector) os.chmod(filename, 0o666) htmlfile = os.path.splitext(filename)[0] + '.html' open(htmlfile, 'wb').write(session.content.encode('utf-8')) os.chmod(htmlfile, 0o666)
def test_ghost(): gh = ghost.Ghost() se = ghost.Session( gh, user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36", wait_timeout=30, display=False, viewport_size=(5000, 1000), ignore_ssl_errors=True, plugins_enabled=True, java_enabled=True, download_images=False) print time.localtime() se.open( "https://jd.com", timeout=30, user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" ) se.wait_for_page_loaded(30) """ try: se.wait_for_alert(30) except: pass def func(): return True """ print se.content print time.localtime()