예제 #1
0
def testExcluded(testdatadir, domaininfo, mapper, scheduler):
    dispatcher = LevelDispatcher(domaininfo, 'wide', mapper, scheduler)
    enq = FileEnqueue(testdatadir.inqdir('wide'))

    curi = dict(u='http://test.example.com/3')
    domaininfo.excluded = 1

    enq.queue([curi])
    enq.close()

    r = dispatcher.processinq(10)

    assert r['processed'] == 1, r
    assert r['scheduled'] == 0, r
    assert r['excluded'] == 1, r
    assert r['saved'] == 0, r

    dispatcher.shutdown()

    # print exclude qfile content
    for q in py.path.local(dispatcher.excludedlist.qdir).listdir(
        fil=lambda p: p.ext == '.gz'):
        with gzip.open(str(q)) as f:
            print f.read()

    items = readqueue(dispatcher.excludedlist.qdir)
    assert len(items) == 1, items
    assert isinstance(items[0], dict), items[0]
    assert items[0]['u'] == curi['u']
예제 #2
0
def testRegular(testdatadir, domaininfo, mapper, scheduler):

    dispatcher = LevelDispatcher(domaininfo, 'wide', mapper, scheduler)

    enq = FileEnqueue(testdatadir.inqdir('wide'))

    curi = dict(u='http://test.example.com/1')
    enq.queue([curi])
    enq.close()

    r = dispatcher.processinq(10)

    assert r['processed'] == 1, r
    assert r['scheduled'] == 1, r
    assert r['excluded'] == 0, r
    assert r['saved'] == 0, r

    assert len(scheduler.curis) == 1
    assert scheduler.curis[0]['u'] == curi['u']
예제 #3
0
def testSeen(testdatadir, domaininfo, mapper, scheduler):
    dispatcher = LevelDispatcher(domaininfo, 'wide', mapper, scheduler)
    enq = FileEnqueue(testdatadir.inqdir('wide'))

    curi1 = dict(u='http://test.example.com/2')
    dispatcher.init_seen()
    dispatcher.seen.already_seen(curi1)

    enq.queue([curi1])
    enq.close()

    #subprocess.call('zcat /tmp/hq/wide/inq/*.gz', shell=1)

    r = dispatcher.processinq(10)

    assert r['processed'] == 1, r
    assert r['scheduled'] == 0, r
    assert r['excluded'] == 0, r
    assert r['saved'] == 0, r

    assert len(scheduler.curis) == 0, scheduler.curis
예제 #4
0
def testOutOfScope(testdatadir, domaininfo, mapper, scheduler):
    dispatcher = LevelDispatcher(domaininfo, 'wide', mapper, scheduler)
    enq = FileEnqueue(testdatadir.inqdir('wide'))

    curi = dict(u='http://test.example.com/')
    scheduler._client_active = False

    enq.queue([curi])
    enq.close()

    r = dispatcher.processinq(10)

    assert r['processed'] == 1, r
    assert r['scheduled'] == 0, r
    assert r['excluded'] == 0, r
    assert r['saved'] == 1, r

    dispatcher.shutdown()

    items = readqueue(dispatcher.diverter.getqueue('0').qdir)
    assert len(items) == 1, items
    assert isinstance(items[0], dict), items[0]
    assert items[0]['u'] == curi['u']