def test_burn(): with stats.record_burn('foo', url='http://example.com/'): t0 = time.process_time() while time.process_time() < t0 + 0.001: pass assert stats.burners['foo']['count'] == 1 assert stats.burners['foo']['time'] > 0 and stats.burners['foo'][ 'time'] < 0.3 assert 'list' not in stats.burners[ 'foo'] # first burn never goes on the list with stats.record_burn('foo', url='http://example.com/'): t0 = time.process_time() while time.process_time() < t0 + 0.2: pass assert stats.burners['foo']['count'] == 2 assert stats.burners['foo']['time'] > 0 and stats.burners['foo'][ 'time'] < 0.3 assert len(stats.burners['foo']['list']) == 1 stats.update_cpu_burn('foo', 3, 3.0, set()) assert stats.burners['foo']['count'] == 5 assert stats.burners['foo']['time'] > 3.0 and stats.burners['foo'][ 'time'] < 3.3 assert len(stats.burners['foo']['list']) == 1 stats.report()
def test_burn(): with stats.record_burn('foo', url='http://example.com/'): t0 = time.process_time() while time.process_time() < t0 + 0.001: pass assert stats.burners['foo']['count'] == 1 assert stats.burners['foo']['time'] > 0 and stats.burners['foo']['time'] < 0.3 assert 'list' not in stats.burners['foo'] # first burn never goes on the list with stats.record_burn('foo', url='http://example.com/'): t0 = time.process_time() while time.process_time() < t0 + 0.2: pass assert stats.burners['foo']['count'] == 2 assert stats.burners['foo']['time'] > 0 and stats.burners['foo']['time'] < 0.3 assert len(stats.burners['foo']['list']) == 1 stats.update_cpu_burn('foo', 3, 3.0, set()) assert stats.burners['foo']['count'] == 5 assert stats.burners['foo']['time'] > 3.0 and stats.burners['foo']['time'] < 3.3 assert len(stats.burners['foo']['list']) == 1 stats.report()
def main(): for d in sys.argv[1:]: if os.path.isfile(d): queue.put_nowait(d) continue for root, _, files in os.walk(d): for f in files: if f.endswith('.html') or f.endswith('.htm'): queue.put_nowait(os.path.join(root, f)) print('Queue size is {}, beginning work.'.format(queue.qsize())) try: loop.run_until_complete(crawl()) print('exit run until complete') except KeyboardInterrupt: sys.stderr.flush() print('\nInterrupt. Exiting cleanly.\n') finally: loop.stop() loop.run_forever() loop.close() levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] logging.basicConfig(level=levels[3]) stats.report()
def test_latency(): with stats.record_latency('foo', url='http://example.com/'): t0 = time.time() while time.time() < t0 + 0.001: pass assert stats.latencies['foo']['count'] == 1 assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 0.3 assert 'list' not in stats.latencies['foo'] # first latency never goes on the list assert 'hist' in stats.latencies['foo'] with stats.record_latency('foo', url='http://example.com/', elapsedmin=0.1): time.sleep(0.3) assert stats.latencies['foo']['count'] == 2 assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 20.0 assert 'list' in stats.latencies['foo'] stats.report()
workers = [asyncio.Task(work(), loop=loop) for _ in range(test_threadcount)] print('q count is {}'.format(queue.qsize())) await queue.join() print('join is done') for w in workers: if not w.done(): w.cancel() # Main program: for i in range(10000): queue.put_nowait('foo') print('Queue size is {}, beginning work.'.format(queue.qsize())) try: loop.run_until_complete(crawl()) print('exit run until complete') except KeyboardInterrupt: sys.stderr.flush() print('\nInterrupt. Exiting cleanly.\n') finally: loop.stop() loop.run_forever() loop.close() levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] logging.basicConfig(level=levels[3]) stats.report() parse.report()
extra1 = v.difference(biggest_links) extra2 = biggest_links.difference(v) if extra1: print(' extra in {}: {!r}'.format(names[i], extra1)) else: print(' count was {} for {}'.format(len(v), names[i])) if extra2: print(' extra in {}: {!r}'.format(names[biggest_index], extra2)) else: print(' count was {} for {}'.format(len(biggest_links), names[biggest_index])) LOGGER = logging.getLogger(__name__) levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] logging.basicConfig(level=levels[3]) for d in sys.argv[1:]: if os.path.isfile(d): with open(d, 'r', errors='ignore') as fi: parse_all(d, fi.read()) continue for root, _, files in os.walk(d): for f in files: if f.endswith('.html') or f.endswith('.htm'): expanded = os.path.join(root, f) with open(expanded, 'r', errors='ignore') as fi: parse_all(expanded, fi.read()) stats.report()