async def handle_run(request): resp = await request.json() try: info = resp['info'] auth = info.get('auth') data = resp['data'] user = auth.get('user') password = auth.get('password') hashed = User.hashit(password) udb = User.query(user=user) user_db = udb.get('user') password_db = udb.get('password') except Exception as e: text = 'bad params! type: ' + str(type(e)) + ' desc: ' + str(e) response = web.Response(text=text, status=422) else: if user == user_db and hashed == password_db: boy = NetBoy() boy.use_info(info) resp = boy.run(data) text = str(resp) response = web.Response(text=text, status=200) else: response = web.Response(text='forbidden', status=403) return response
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider( 'pycurl' ).use_filter( ['url', 'effect', 'title', 'charset'] ).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter( ['url', 'title', 'effect', 'data']).use_triggers([ 'test.functional.netboy.test_baidu_subdomain.trig_sub', 'test.functional.netboy.test_baidu_subdomain.trig_print', ]).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter([ 'url', 'title', 'effect', 'data' ]).use_workers().use_triggers([ 'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it', 'test.functional.netboy.test_thread_pycurl_trigger_redirect.trigger_it2' ]).use_mode('thread').use_timeout(15, 5) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter([ 'url', 'title', 'effect' ]).use_triggers([ 'test.functional.netboy.test_thread_pycurl_basic.trig_it' ]).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(4, 2, 2) #.use_queue('worker') resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_triggers( ['test.functional.netboy.test_pycurl_with_cookie.trig_it']) boy.use_spider('pycurl').use_filter( ['url', 'effect', 'title', 'cookie']).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(1, 1, 1) # boy.info['cookie'] = bytes('test=value',"utf8") boy.info['cookie'] = 't1=v1;t2=v2'.encode("utf8") resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_triggers( ['test.functional.netboy.test_pycurl_server_set_cookie.trig_it']) boy.use_spider('pycurl').use_filter( ['url', 'effect', 'title', 'cookie']).use_mode('process').use_timeout(10, 5, 5, 5).use_workers(1, 1, 1) resp = boy.run(data) return resp
def test_it(data): setup_log('netboy') boy = NetBoy() boy.use_spider('pycurl').use_filter( ['url', 'title', 'effect', 'data']).use_triggers([ 'test.functional.netboy.test_baidu_subdomain_location.trig_sub', 'test.functional.netboy.test_baidu_subdomain_location.trig_print', ]).use_mode('celery').use_timeout(10, 5, 5, 5).use_workers( 8, 2, 2).use_queue('worker').use_final( 'test.functional.netboy.test_baidu_subdomain_location.final') boy.info['job_id'] = 'test' resp = boy.run(data) return resp
def trig_sub(payload, response): data = response.get('data') soup = BeautifulSoup(data, 'html.parser') aa = soup.select('h3 a') aaa = [e['href'] for e in aa] boy = NetBoy() boy.use_spider( 'pycurl' ).use_filter( ['effect'] ).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(4, 5, 5) boy.info['maxredirs'] = 2 resps = boy.run(aaa) urls = [e['effect'] if e.get('state') == 'normal' else 'error' for resp in resps for e in resp] response.update({'urls': urls}) return response
def trig_sub(payload, response): data = response.get('data') soup = BeautifulSoup(data, 'html.parser') aa = soup.select('h3 a') aaa = [e['href'] for e in aa] boy = NetBoy() boy.use_spider('pycurl').use_filter( ['header']).use_mode('thread').use_timeout(10, 5, 5, 5).use_workers(8, 2, 2) boy.info['maxredirs'] = 1 boy.info['followlocation'] = 0 urls = [] resps = boy.run(aaa) for resp in resps: for r in resp: content = r['header']['content'] for c in content: if c.get('location'): urls.append(c.get('location')[0]) response.update({'urls': urls}) return response
# "http://www.hxsjjq.gov.cn", "http://www.douban.com" # "http://www.bing.com", # "http://www.hbcs.gov.cn", # "http://www.hnxcdj.com", # "http://www.ryzj.gov.cn", # "http://www.lhsajj.com", # "http://www.nysylj.com", # "http://www.nyszglc.com", # "http://www.ayjtj.gov.cn", # "http://www.hbxgtzyj.gov.cn", # "http://www.hnrdia.com", # "http://www.zmdggjy.com", # "http://www.xyxgtzyj.gov.cn", # "http://www.xxdsyjs.com", # "http://www.xixiaagri.gov.cn", # "http://www.xmdj.gov.cn", # "http://www.xysrsjzlzpksbmw.gov.cn", ] * 1 # boy = NetBoy({'mode': 'coroutine'}) # boy = NetBoy({'mode':'process'}) boy = NetBoy() boy.use_queue('worker').use_spider('chrome').use_filter( ['url', 'title', 'screen']).use_workers().use_triggers([ 'test.functional.netboy.screen.print_screen' # ]).use_mode('celery') ]).use_mode('thread') #.use_socks5_proxy('127.0.0.1:1082') resp = boy.run(data) # print(resp[0].get('screen')[:100])
import logging from netboy.netboy import NetBoy from netboy.util.setup_log import setup_log def print_screen(payload, reponse): log_name = payload.get('log') log = logging.getLogger(log_name) screen_100 = reponse['screen'][:100] log.info(screen_100) if __name__ == '__main__': setup_log('netboy') data = [ 'http://www.bing.com', ] * 1 boy = NetBoy() boy.use_queue('worker').use_spider('chrome').use_filter( ['url', 'title', 'screen']).use_auth('dameng', 'hello') resp = boy.run_remote('127.0.0.1:8080/run', data, callback_data={ 'url': '127.0.0.1:8080/post', 'hello': 'world' }) # print(resp) # print(resp[0].get('screen')[:100])
import logging from netboy.netboy import NetBoy from netboy.util.setup_log import setup_log def print_screen(payload, reponse): log_name = payload.get('log') log = logging.getLogger(log_name) screen_100 = reponse['screen'][:100] log.info(screen_100) if __name__ == '__main__': setup_log('netboy') boy = NetBoy() resp = boy.register_remote('127.0.0.1:8080/register', user='******', password='******')
if response.get('screen'): print('title', response.get('title')) # print('data', response.get('data')) screen = response['screen'] # with open('test.png', 'wb') as f: # f.write(screen) screen_100 = screen[:100] log.info('screen:' + screen_100) else: print(response) if __name__ == '__main__': print('haha') setup_log('netboy') data = [ "http://ssqgz.30edu.com.cn/", "http://rneg.30edu.com.cn/", "http://tkxjytyj.30edu.com.cn" ] * 1 # boy = NetBoy({'mode': 'coroutine'}) # boy = NetBoy({'mode':'process'}) boy = NetBoy() boy.use_queue('worker').use_spider('chrome').use_filter( ['url', 'title', 'screen', 'data']).use_workers().use_triggers([ 'test.functional.netboy.test_chrome_edu30.print_screen' # ]).use_mode('celery') ]).use_mode('thread').use_timeout(30, 30, 30, 30) resp = boy.run(data) # print(resp[0].get('screen')[:100])
results['headers'] = { k: v for k, v in response.headers.items() } if 'cookies' in filter: results['cookies'] = { k: v for k, v in response.cookies.items() } if 'effect' in filter: results['effect'] = response.real_url if 'code' in filter: results['code'] = response.status if 'method' in filter: results['method'] = response.method return results if __name__ == '__main__': info = {'chunk_size': 1} data = ['http://www.baidu.com', 'http://www.bing.com'] #, 'http://www.google.com' ] # f = AIOHttpFactory(data, info) # f.run() boy = NetBoy(info=info) boy.use_mode('coroutine').use_spider('aiohttp').use_filter( ['title', 'url', 'title', 'code', 'time']) resp = boy.run(data) # print(resp.keys()) print(json.dumps(resp, indent=2, ensure_ascii=False))