async def handle_run(request): resp = await request.json() try: info = resp['info'] auth = info.get('auth') data = resp['data'] user = auth.get('user') password = auth.get('password') hashed = User.hashit(password) udb = User.query(user=user) user_db = udb.get('user') password_db = udb.get('password') except Exception as e: text = 'bad params! type: ' + str(type(e)) + ' desc: ' + str(e) response = web.Response(text=text, status=422) else: if user == user_db and hashed == password_db: boy = NetBoy() boy.use_info(info) resp = boy.run(data) text = str(resp) response = web.Response(text=text, status=200) else: response = web.Response(text='forbidden', status=403) return response
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider( 'pycurl' ).use_filter( ['url', 'effect', 'title', 'charset'] ).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_triggers(['test.functional.netboy.test_pycurl_get_header.trig_it']) boy.use_spider('pycurl').use_filter([ 'url', 'effect', 'title', 'header', 'cookie' ]).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(1, 1, 1) # boy.info['cookie'] = bytes('test=value',"utf8") resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter( ['url', 'title', 'effect', 'data']).use_triggers([ 'test.functional.netboy.test_baidu_subdomain.trig_sub', 'test.functional.netboy.test_baidu_subdomain.trig_print', ]).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_triggers( ['test.functional.netboy.test_pycurl_server_set_cookie.trig_it']) boy.use_spider('pycurl').use_filter( ['url', 'effect', 'title', 'cookie']).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(1, 1, 1) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter([ 'url', 'title', 'effect', 'data' ]).use_workers().use_triggers([ 'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it', 'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it2' ]).use_mode('thread').use_timeout(15, 5) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter([ 'url', 'title', 'effect' ]).use_triggers([ 'test.functional.netboy.test_thread_pycurl_basic.trig_it' ]).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) #.use_queue('worker') resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter( ['url', 'title', 'effect', 'data']).use_triggers([ 'test.functional.netboy.test_baidu_subdomain_location.trig_sub', 'test.functional.netboy.test_baidu_subdomain_location.trig_print', ]).use_mode('celery').use_timeout(10, 5, 5, 5).use_workers( 8, 2, 2).use_queue('worker').use_final( 'test.functional.netboy.test_baidu_subdomain_location.final') boy.info['job_id'] = 'test' resp = boy.run(data) return resp
def trig_sub(payload, response): data = response.get('data') soup = BeautifulSoup(data, 'html.parser') aa = soup.select('h3 a') aaa = [e['href'] for e in aa] boy = NetBoy() boy.use_spider( 'pycurl' ).use_filter( ['effect'] ).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(4, 5, 5) boy.info['maxredirs'] = 2 resps = boy.run(aaa) urls = [e['effect'] if e.get('state') == 'normal' else 'error' for resp in resps for e in resp] response.update({'urls': urls}) return response
def trig_sub(payload, response): data = response.get('data') soup = BeautifulSoup(data, 'html.parser') aa = soup.select('h3 a') aaa = [e['href'] for e in aa] boy = NetBoy() boy.use_spider('pycurl').use_filter( ['header']).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(8, 2, 2) boy.info['maxredirs'] = 1 boy.info['followlocation'] = 0 urls = [] resps = boy.run(aaa) for resp in resps: for r in resp: content = r['header']['content'] for c in content: if c.get('location'): urls.append(c.get('location')[0]) response.update({'urls': urls}) return response
# "http://www.hxsjjq.gov.cn", "http://www.douban.com" # "http://www.bing.com", # "http://www.hbcs.gov.cn", # "http://www.hnxcdj.com", # "http://www.ryzj.gov.cn", # "http://www.lhsajj.com", # "http://www.nysylj.com", # "http://www.nyszglc.com", # "http://www.ayjtj.gov.cn", # "http://www.hbxgtzyj.gov.cn", # "http://www.hnrdia.com", # "http://www.zmdggjy.com", # "http://www.xyxgtzyj.gov.cn", # "http://www.xxdsyjs.com", # "http://www.xixiaagri.gov.cn", # "http://www.xmdj.gov.cn", # "http://www.xysrsjzlzpksbmw.gov.cn", ] * 1 # boy = NetBoy({'mode': 'coroutine'}) # boy = NetBoy({'mode':'process'}) boy = NetBoy() boy.use_queue('worker').use_spider('chrome').use_filter( ['url', 'title', 'screen']).use_workers().use_triggers([ 'test.functional.netboy.screen.print_screen' # ]).use_mode('celery') ]).use_mode('thread') #.use_socks5_proxy('127.0.0.1:1082') resp = boy.run(data) # print(resp[0].get('screen')[:100])