def fetch_dom(link_dir, link, timeout=TIMEOUT): """print HTML of site to file using chrome --dump-html""" output = 'output.html' output_path = os.path.join(link_dir, output) cmd = [ *chrome_args(TIMEOUT=timeout), '--dump-dom', link['url'] ] status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') try: with open(output_path, 'w+') as f: result = run(cmd, stdout=f, stderr=PIPE, cwd=link_dir, timeout=timeout) if result.returncode: hints = result.stderr.decode() raise ArchiveError('Failed to fetch DOM', hints) chmod_file(output, cwd=link_dir) except Exception as err: status = 'failed' output = err finally: timer.end() return { 'cmd': cmd, 'pwd': link_dir, 'output': output, 'status': status, **timer.stats, }
def fetch_screenshot(link_dir, link, timeout=TIMEOUT): """take screenshot of site using chrome --headless""" output = 'screenshot.png' cmd = [ *chrome_args(TIMEOUT=timeout), '--screenshot', link['url'], ] status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') try: result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout) if result.returncode: hints = (result.stderr or result.stdout).decode() raise ArchiveError('Failed to take screenshot', hints) chmod_file(output, cwd=link_dir) except Exception as err: status = 'failed' output = err finally: timer.end() return { 'cmd': cmd, 'pwd': link_dir, 'output': output, 'status': status, **timer.stats, }