Ejemplo n.º 1
0
def test_burn():
    with stats.record_burn('foo', url='http://example.com/'):
        t0 = time.process_time()
        while time.process_time() < t0 + 0.001:
            pass

    assert stats.burners['foo']['count'] == 1
    assert stats.burners['foo']['time'] > 0 and stats.burners['foo'][
        'time'] < 0.3
    assert 'list' not in stats.burners[
        'foo']  # first burn never goes on the list

    with stats.record_burn('foo', url='http://example.com/'):
        t0 = time.process_time()
        while time.process_time() < t0 + 0.2:
            pass

    assert stats.burners['foo']['count'] == 2
    assert stats.burners['foo']['time'] > 0 and stats.burners['foo'][
        'time'] < 0.3
    assert len(stats.burners['foo']['list']) == 1

    stats.update_cpu_burn('foo', 3, 3.0, set())
    assert stats.burners['foo']['count'] == 5
    assert stats.burners['foo']['time'] > 3.0 and stats.burners['foo'][
        'time'] < 3.3
    assert len(stats.burners['foo']['list']) == 1

    stats.report()
Ejemplo n.º 2
0
def test_burn():
    with stats.record_burn('foo', url='http://example.com/'):
        t0 = time.process_time()
        while time.process_time() < t0 + 0.001:
            pass

    assert stats.burners['foo']['count'] == 1
    assert stats.burners['foo']['time'] > 0 and stats.burners['foo']['time'] < 0.3
    assert 'list' not in stats.burners['foo']  # first burn never goes on the list

    with stats.record_burn('foo', url='http://example.com/'):
        t0 = time.process_time()
        while time.process_time() < t0 + 0.2:
            pass

    assert stats.burners['foo']['count'] == 2
    assert stats.burners['foo']['time'] > 0 and stats.burners['foo']['time'] < 0.3
    assert len(stats.burners['foo']['list']) == 1

    stats.update_cpu_burn('foo', 3, 3.0, set())
    assert stats.burners['foo']['count'] == 5
    assert stats.burners['foo']['time'] > 3.0 and stats.burners['foo']['time'] < 3.3
    assert len(stats.burners['foo']['list']) == 1

    stats.report()
Ejemplo n.º 3
0
def main():
    for d in sys.argv[1:]:
        if os.path.isfile(d):
            queue.put_nowait(d)
            continue
        for root, _, files in os.walk(d):
            for f in files:
                if f.endswith('.html') or f.endswith('.htm'):
                    queue.put_nowait(os.path.join(root, f))

    print('Queue size is {}, beginning work.'.format(queue.qsize()))

    try:
        loop.run_until_complete(crawl())
        print('exit run until complete')
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupt. Exiting cleanly.\n')
    finally:
        loop.stop()
        loop.run_forever()
        loop.close()

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[3])
    stats.report()
Ejemplo n.º 4
0
def test_latency():
    with stats.record_latency('foo', url='http://example.com/'):
        t0 = time.time()
        while time.time() < t0 + 0.001:
            pass

    assert stats.latencies['foo']['count'] == 1
    assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 0.3
    assert 'list' not in stats.latencies['foo']  # first latency never goes on the list
    assert 'hist' in stats.latencies['foo']

    with stats.record_latency('foo', url='http://example.com/', elapsedmin=0.1):
        time.sleep(0.3)

    assert stats.latencies['foo']['count'] == 2
    assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 20.0
    assert 'list' in stats.latencies['foo']

    stats.report()
Ejemplo n.º 5
0
def test_latency():
    with stats.record_latency('foo', url='http://example.com/'):
        t0 = time.time()
        while time.time() < t0 + 0.001:
            pass

    assert stats.latencies['foo']['count'] == 1
    assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 0.3
    assert 'list' not in stats.latencies['foo']  # first latency never goes on the list
    assert 'hist' in stats.latencies['foo']

    with stats.record_latency('foo', url='http://example.com/', elapsedmin=0.1):
        time.sleep(0.3)

    assert stats.latencies['foo']['count'] == 2
    assert stats.latencies['foo']['time'] > 0 and stats.latencies['foo']['time'] < 20.0
    assert 'list' in stats.latencies['foo']

    stats.report()
Ejemplo n.º 6
0
    workers = [asyncio.Task(work(), loop=loop) for _ in range(test_threadcount)]
    print('q count is {}'.format(queue.qsize()))
    await queue.join()
    print('join is done')
    for w in workers:
        if not w.done():
            w.cancel()

# Main program:

for i in range(10000):
    queue.put_nowait('foo')

print('Queue size is {}, beginning work.'.format(queue.qsize()))

try:
    loop.run_until_complete(crawl())
    print('exit run until complete')
except KeyboardInterrupt:
    sys.stderr.flush()
    print('\nInterrupt. Exiting cleanly.\n')
finally:
    loop.stop()
    loop.run_forever()
    loop.close()

levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
logging.basicConfig(level=levels[3])
stats.report()
parse.report()
Ejemplo n.º 7
0
            extra1 = v.difference(biggest_links)
            extra2 = biggest_links.difference(v)
            if extra1:
                print('  extra in {}: {!r}'.format(names[i], extra1))
            else:
                print('  count was {} for {}'.format(len(v), names[i]))
            if extra2:
                print('  extra in {}: {!r}'.format(names[biggest_index], extra2))
            else:
                print('  count was {} for {}'.format(len(biggest_links), names[biggest_index]))


LOGGER = logging.getLogger(__name__)

levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
logging.basicConfig(level=levels[3])

for d in sys.argv[1:]:
    if os.path.isfile(d):
        with open(d, 'r', errors='ignore') as fi:
            parse_all(d, fi.read())
        continue
    for root, _, files in os.walk(d):
        for f in files:
            if f.endswith('.html') or f.endswith('.htm'):
                expanded = os.path.join(root, f)
                with open(expanded, 'r', errors='ignore') as fi:
                    parse_all(expanded, fi.read())

stats.report()