Ejemplo n.º 1
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_triggers(['test.functional.netboy.test_pycurl_get_header.trig_it'])
    boy.use_spider('pycurl').use_filter([
        'url', 'effect', 'title', 'header', 'cookie'
    ]).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(1, 1, 1)
    # boy.info['cookie'] = bytes('test=value',"utf8")
    resp = boy.run(data)
    return resp
Ejemplo n.º 2
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_spider(
        'pycurl'
    ).use_filter(
        ['url', 'effect', 'title', 'charset']
    ).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2)
    resp = boy.run(data)
    return resp
Ejemplo n.º 3
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_spider('pycurl').use_filter(
        ['url', 'title', 'effect', 'data']).use_triggers([
            'test.functional.netboy.test_baidu_subdomain.trig_sub',
            'test.functional.netboy.test_baidu_subdomain.trig_print',
        ]).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2)
    resp = boy.run(data)
    return resp
Ejemplo n.º 4
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_triggers(
        ['test.functional.netboy.test_pycurl_server_set_cookie.trig_it'])
    boy.use_spider('pycurl').use_filter(
        ['url', 'effect', 'title',
         'cookie']).use_mode('process').use_timeout(10, 5, 5,
                                                    5).use_workers(1, 1, 1)
    resp = boy.run(data)
    return resp
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_spider('pycurl').use_filter([
        'url', 'title', 'effect', 'data'
    ]).use_workers().use_triggers([
        'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it',
        'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it2'
    ]).use_mode('thread').use_timeout(15, 5)
    resp = boy.run(data)
    return resp
Ejemplo n.º 6
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_spider('pycurl').use_filter([
        'url', 'title', 'effect'
    ]).use_triggers([
        'test.functional.netboy.test_thread_pycurl_basic.trig_it'
    ]).use_mode('thread').use_timeout(10, 5, 5,
                                      5).use_workers(4, 2,
                                                     2)  #.use_queue('worker')
    resp = boy.run(data)
    return resp
Ejemplo n.º 7
0
def test_it(data):
    setup_log('netboy')
    boy = NetBoy()
    boy.use_spider('pycurl').use_filter(
        ['url', 'title', 'effect', 'data']).use_triggers([
            'test.functional.netboy.test_baidu_subdomain_location.trig_sub',
            'test.functional.netboy.test_baidu_subdomain_location.trig_print',
        ]).use_mode('celery').use_timeout(10, 5, 5, 5).use_workers(
            8, 2, 2).use_queue('worker').use_final(
                'test.functional.netboy.test_baidu_subdomain_location.final')
    boy.info['job_id'] = 'test'
    resp = boy.run(data)
    return resp
Ejemplo n.º 8
0
def trig_sub(payload, response):
    data = response.get('data')
    soup = BeautifulSoup(data, 'html.parser')
    aa = soup.select('h3 a')
    aaa = [e['href'] for e in aa]
    boy = NetBoy()
    boy.use_spider(
        'pycurl'
    ).use_filter(
        ['effect']
    ).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(4, 5, 5)
    boy.info['maxredirs'] = 2
    resps = boy.run(aaa)
    urls = [e['effect'] if e.get('state') == 'normal' else 'error' for resp in resps for e in resp]

    response.update({'urls': urls})
    return response
Ejemplo n.º 9
0
def trig_sub(payload, response):
    data = response.get('data')
    soup = BeautifulSoup(data, 'html.parser')
    aa = soup.select('h3 a')
    aaa = [e['href'] for e in aa]
    boy = NetBoy()
    boy.use_spider('pycurl').use_filter(
        ['header']).use_mode('thread').use_timeout(10, 5, 5,
                                                   5).use_workers(8, 2, 2)
    boy.info['maxredirs'] = 1
    boy.info['followlocation'] = 0
    urls = []
    resps = boy.run(aaa)
    for resp in resps:
        for r in resp:
            content = r['header']['content']
            for c in content:
                if c.get('location'):
                    urls.append(c.get('location')[0])
    response.update({'urls': urls})
    return response