def __init__(self): # path = "C:\Program Files (x86)\Google\Chrome\chromedriver" # self.driver = webdriver.Chrome(executable_path=path) self.offset_list = [] self.data = [] self.proxy_list = [] '''logging module''' self.logger = logging.getLogger('mylogger') self.logger.setLevel(logging.DEBUG) fh = logging.FileHandler('debug.log') ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter("%(message)s") fh.setFormatter(formatter) formatter = logging.Formatter( "%(module)s:%(lineno)s %(funcName)s %(message)s") ch.setFormatter(formatter) self.logger.addHandler(fh) self.logger.addHandler(ch) logging.getLogger("requests").setLevel(logging.WARNING) self.proxy_spider = Proxy() self.update_proxy() self.import_proxy() self.update_driver()
def parse_page(self, response): self.write(response.body) sel = Selector(response) infos = sel.xpath('//table[@class="bg"]/tr').extract() for i, info in enumerate(infos): if i == 0: continue val = Selector(text=info) ip = val.xpath('//td[2]/text()').extract_first() port = val.xpath('//td[3]/text()').extract_first() country = val.xpath('//td[5]/text()').extract_first() anonymity = val.xpath('//td[4]/text()').extract_first() proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy=proxy)
def start(): from Project import Project import Record project = Project() record = Record.Record() Proxy.begin_catch( #callback = record.add_hit, callback=callback, filter=lambda x: True, hittype=HitData, ) proxy = Proxy.thread_start() while True: c = raw_input('Enter stop to stop > ') if c == 'stop': break Proxy.stop() proxy.join() print 'Recording finished' print
def _item_info_update(self, items): column_id = items[0] item_id = items[1] item_id = str(item_id) sq = Sql() pr = Proxy() if PROXY_CRAWL == 1: # Using free proxy pool while True: proxy_info = pr.get_proxy() # tuple: header, proxy cr = Crawler(proxy_info[1]) item_info = cr.get_jd_item(item_id) if item_info: # name, price, subtitle, plus_price sq.update_item_name(column_id, item_info[0]) sq.update_item_price(column_id, item_info[1]) sq.update_item_subtitle(column_id, item_info[2]) sq.update_item_plus_price(column_id, item_info[3]) cr = Crawler(proxy_info[1]) # MUST create new instance otherwise got error huihui_info = cr.get_huihui_item(item_id) if huihui_info: # skip this if not crawled sq.update_item_max_price(column_id, huihui_info[0]) sq.update_item_min_price(column_id, huihui_info[1]) break elif PROXY_CRAWL == 2: # Using zhima proxy while True: if not self.proxy_info_zhima: self.proxy_info_zhima = pr.get_proxy_zhima() logging.info('Zhima proxy: %s', self.proxy_info_zhima[1]) cr = Crawler(self.proxy_info_zhima[1]) item_info = cr.get_jd_item(item_id) if not item_info: self.proxy_info_zhima = () logging.warning('Zhima proxy crawl failure, changing proxy...') time.sleep(5) continue else: sq.update_item_name(column_id, item_info[0]) sq.update_item_price(column_id, item_info[1]) sq.update_item_subtitle(column_id, item_info[2]) sq.update_item_plus_price(column_id, item_info[3]) cr = Crawler(self.proxy_info_zhima[1]) # MUST create new instance otherwise got error huihui_info = cr.get_huihui_item(item_id) if huihui_info: # skip this if not crawled sq.update_item_max_price(column_id, huihui_info[0]) sq.update_item_min_price(column_id, huihui_info[1]) break else: # Using local ip cr = Crawler() item_info = cr.get_jd_item(item_id) sq.update_item_name(column_id, item_info[0]) sq.update_item_price(column_id, item_info[1]) sq.update_item_subtitle(column_id, item_info[2]) sq.update_item_plus_price(column_id, item_info[3]) cr = Crawler() # MUST create new instance otherwise got error huihui_info = cr.get_huihui_item(item_id) sq.update_item_max_price(column_id, huihui_info[0]) sq.update_item_min_price(column_id, huihui_info[1]) return item_info
def parse_page(self, response): pattern = re.compile('<tr class=(.*?)</tr>', re.S) items = re.findall(pattern=pattern, string=response.body) for i, item in enumerate(items): if i > 0: if 'async' in item: continue ip_pattern = re.compile('IPDecode\(\"(.*?)\"\)', re.S) ip_decode = re.findall(ip_pattern, item)[0] ip_url = urllib.unquote(ip_decode) ip_soup = BeautifulSoup(ip_url, 'lxml') ip = ip_soup.text.encode() item = '<tr class=' + item + '</tr>' soup = BeautifulSoup(item, 'lxml') tbodys = soup.find_all('td') proxy = Proxy() proxy.set_value( ip=ip, port=tbodys[1].text.encode(), country=tbodys[4].text.encode(), anonymity=tbodys[3].text.encode(), source=self.name, ) self.add_proxy(proxy=proxy)
def post(self, orgname): permission = AdministerOrganizationPermission(orgname) if not permission.can(): raise Unauthorized() try: model.proxy_cache.get_proxy_cache_config_for_org(orgname) request_error("Proxy Cache Configuration already exists") except model.InvalidProxyCacheConfigException: pass data = request.get_json() # filter None values data = {k: v for k, v in data.items() if v is not None} try: config = ProxyCacheConfig(**data) existing = model.organization.get_organization(orgname) config.organization = existing proxy = Proxy(config, "something-totally-fake", True) response = proxy.get(f"{proxy.base_url}/v2/") if response.status_code == 200: return "Valid", 202 except UpstreamRegistryError as e: raise request_error( message= "Failed login to remote registry. Please verify entered details and try again." ) raise request_error( message="Failed to validate Proxy cache configuration")
def get_proxy(self, url): """ get the list of proxies from the url using phantomjs :param driver: phantomjs driver :param url: url link of the page :return: a list contains the proxies """ self.driver.get(url) sleep(2) if 'HTTP' not in self.driver.title: return [] else: tbody = self.driver.find_element_by_tag_name('tbody') content = tbody.text.split('\n') proxies = [] for line in content: tt = line.split() tmp = tt[0:4] tmp.append(''.join(tt[4:7])) proxies.append(tmp) for proxy in proxies: tmp = Proxy() tmp.set_value( ip=proxy[0], port=proxy[1], country=proxy[4], anonymity=proxy[2], source='kuaidaili', ) self.add_proxy(tmp)
def GET(self): try: sql = SqlHelper() inputs = web.input() name = inputs.get('name') proxy = Proxy() proxy.set_value( ip=inputs.get('ip'), port=inputs.get('port'), country=inputs.get('country', None), anonymity=inputs.get('anonymity', None), https=inputs.get('https', 'no'), speed=inputs.get('speed', -1), source=inputs.get('source', name), ) utils.sql_insert_proxy(sql, name, proxy) command = "SELECT ip FROM {0} WHERE ip={1} AND port={2}".format( name, inputs.get('ip'), inputs.get('port')) res = sql.query_one(command) return res is None except: pass return False
def start(): from Project import Project import Record project = Project() record = Record.Record() Proxy.begin_catch( #callback = record.add_hit, callback = callback, filter = lambda x: True, hittype = HitData, ) proxy = Proxy.thread_start() while True: c = raw_input('Enter stop to stop > ') if c == 'stop': break Proxy.stop() proxy.join() print 'Recording finished' print
def parse_page(self, response): next_url = response.css('a[rel="next"]::attr(href)').extract_first() if next_url: next_url = self.base_url + next_url yield scrapy.Request(url=next_url, callback=self.parse_page) sel = Selector(text=response.body) infos = sel.xpath('//tr[@class="odd"]').extract() for info in infos: val = Selector(text=info) ip = val.xpath('//td[2]/text()').extract_first() port = val.xpath('//td[3]/text()').extract_first() country = val.xpath('//td[4]/a/text()').extract_first() anonymity = val.xpath('//td[5]/text()').extract_first() proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy=proxy)
def craw(self, root_url, sess): companys = [] pro = Proxy('proxy.txt') html_content = self.downloader.download(root_url, retry_count=3, headers=requests_headers(), proxy=pro.get_proxy(), sess=sess, cookie=self.cookie) types, infos = self.parser.get_infos(root_url, html_content) for i in range(len(infos)): url = "http://icid.iachina.cn/front/getCompanyInfos.do?columnid=" + infos[ i][0] + "&informationno=" + infos[i][1] + "&attr=" + infos[i][2] inside_html_content = self.downloader.download( url, retry_count=3, headers=requests_headers(), proxy=pro.get_proxy(), sess=sess) company = self.parser.get_data(url, inside_html_content) company['叶子结点栏目名称'] = types[i] print(company) companys.append(company) return companys
def parse_page(self, response): logging.info("parse_page :%s" % response.url) next_url = self.parse_next_url(response) if next_url: yield scrapy.Request(url=next_url, callback=self.parse_page) trs = response.css('#main table tr') for tr in trs: tds = tr.css("td") if len(tds) != 10: continue ip = tds[0].xpath('script/text()').extract_first() port = tds[1].xpath('text()').extract_first() country = tds[3].xpath('text()').extract_first() anonymity = tds[5].xpath('text()').extract_first() try: ip = re.search(r"str_rot13\(\"(\S+)\"", ip).group(1) ip = self.decode_ip(ip) except Exception as e: continue proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy)
def request_download(self, url, method='get', encode='utf-8', redirect_check=False, error_check=False, data=None): for i in range(self.RETRY): proxies = self.pick_cookie_agent_proxy(url) try: if method == 'post': response = self.driver.post(url, timeout=self.TIMEOUT, data=data) else: response = self.driver.get(url, timeout=self.TIMEOUT) if response.status_code == 200: if redirect_check and response.url != url: continue if error_check: if __import__('fetch.error_checker.{}'.format(self.batch_key), fromlist=['error_checker']).error_checker(response): continue response.encoding = encode return response.text # text is unicode except: # requests.exceptions.ProxyError, requests.ConnectionError, requests.ConnectTimeout proxy = proxies.items()[0][1] Proxy.instance().post(url, proxy) print('requests failed: ', sys.exc_info()[0]) log_traceback() finally: time.sleep(self._get_sleep_period()) else: return u''
def test_authenticated_proxy_http_get(self): self.proxy = Proxy(Client(self._conn, self._addr), b'Basic %s' % base64.b64encode(b'user:pass')) self.proxy.client.conn.queue((b'GET http://localhost:%d HTTP/1.1' % self.http_server_port) + CRLF) self.proxy._process_request(self.proxy.client.recv()) self.assertNotEqual(self.proxy.request.state, HttpParser.states.COMPLETE) self.proxy.client.conn.queue( CRLF.join([ b'User-Agent: proxy.py/%s' % version, b'Host: localhost:%d' % self.http_server_port, b'Accept: */*', b'Proxy-Connection: Keep-Alive', b'Proxy-Authorization: Basic dXNlcjpwYXNz', CRLF ])) self.proxy._process_request(self.proxy.client.recv()) self.assertEqual(self.proxy.request.state, HttpParser.states.COMPLETE) self.assertEqual(self.proxy.server.addr, (b'localhost', self.http_server_port)) self.proxy.server.flush() self.assertEqual(self.proxy.server.buffer_size(), 0) data = self.proxy.server.recv() while data: self.proxy._process_response(data) if self.proxy.response.state == HttpParser.states.COMPLETE: break data = self.proxy.server.recv() self.assertEqual(self.proxy.response.state, HttpParser.states.COMPLETE) self.assertEqual(int(self.proxy.response.code), 200)
def parse_page(self, response): self.write(response.body) sel = Selector(response) infos = sel.xpath('//ul[@class="l2"]').extract() for i, info in enumerate(infos): val = Selector(text=info) ip = val.xpath( '//ul[@class="l2"]/span[1]/li/text()').extract_first() port = val.xpath( '//ul[@class="l2"]/span[2]/li/text()').extract_first() anonymity = val.xpath( '//ul[@class="l2"]/span[3]/li/text()').extract_first() https = val.xpath( '//ul[@class="l2"]/span[4]/li/text()').extract_first() country = val.xpath( '//ul[@class="l2"]/span[5]/li/a/text()').extract_first() proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy=proxy)
def start_requests(self): count = self.sql.get_proxy_count(config.free_ipproxy_table) count_free = self.sql.get_proxy_count(config.httpbin_table) # ids = self.sql.get_proxy_ids(config.free_ipproxy_table) # ids_httpbin = self.sql.get_proxy_ids(config.httpbin_table) logging.info('init data...') for data in self.sql.db[config.free_ipproxy_table].find(self.query): url = random.choice(self.urls) cur_time = time.time() proxy = Proxy() proxy.set_value(ip=data.get('ip'), port=data.get('port'), country=data.get('country'), anonymity=data.get('country'), https=data.get('https'), speed=data.get('speed'), source=data.get('source'), vali_count=data.get('vali_count'), err_count=data.get('err_count')) proxy.id = data.get('_id') args = (cur_time, data, 'http://%s:%s' % (proxy.ip, proxy.port)) j = SimplePool.ThreadJob(self.valid, args) self.threadpool.add_job(j) result = ValidThread(self.threadpool) result.start() self.threadpool.start() self.threadpool.finish()
def test_manifest_exists_404(self): with HTTMock(docker_registry_mock): proxy = Proxy(self.config, "library/postgres") with pytest.raises(UpstreamRegistryError) as excinfo: proxy.manifest_exists(image_ref=TAG_404) self.assertIn("404", str(excinfo.value))
def test_get_blob(self): with HTTMock(docker_registry_mock): proxy = Proxy(self.config, "library/postgres") try: proxy.get_blob(digest=DIGEST) except UpstreamRegistryError as e: pytest.fail(f"unexpected UpstreamRegistryError {e}")
def test_dir(): proxy = Proxy() assert dir(proxy) == [] Proxy.set_value(proxy, INT_TEST_VALUE) assert dir(proxy) == dir(INT_TEST_VALUE)
class Scrapper: def __init__(self): self.proxy = Proxy() self.session = requests.session() self.ua = UserAgent() def generate_headers(self): return { 'User-Agent': self.ua.random, "Accept-Language": "en-US, en;q=0.5" } def get_page(self, page_url, encoding='cp1251'): headers = self.generate_headers() proxy = self.proxy.get_proxy() res = self.session.get(page_url, headers=headers, proxies=proxy) res.encoding = encoding return res def get_page_post(self, page_url, data, encoding='cp1251'): headers = self.generate_headers() proxy = self.proxy.get_proxy() res = self.session.post(page_url, headers=headers, proxies=proxy, data=data) res.encoding = encoding return res def pars_page(self, res, param="html.parser"): parsed_page = BeautifulSoup(res.text, param) return parsed_page def get_list(self, soup_html, html_elem, attrs={}): return soup_html.find_all(html_elem, attrs)
def test_get_blob_404(self): with HTTMock(docker_registry_mock): proxy = Proxy(self.config, "library/postgres") with pytest.raises(UpstreamRegistryError) as excinfo: proxy.get_blob(digest=DIGEST_404) self.assertIn("404", str(excinfo.value))
def parse_page(self, response): html = Selector(response) ip_list = [ html.xpath(f"/html/body/div[1]/div[2]/div[1]/div[1]/table/tbody/tr[{i}]/td[1]/text()").extract_first() for i in range(1, 16)] port_list = [ html.xpath(f"/html/body/div[1]/div[2]/div[1]/div[1]/table/tbody/tr[{i}]/td[2]/text()").extract_first() for i in range(1, 16)] country_list = [ html.xpath(f"/html/body/div[1]/div[2]/div[1]/div[1]/table/tbody/tr[{i}]/td[5]/text()").extract_first() for i in range(1, 16)] anonymity_list = [ html.xpath(f"/html/body/div[1]/div[2]/div[1]/div[1]/table/tbody/tr[{i}]/td[3]/text()").extract_first() for i in range(1, 16)] for new_ip, new_port, new_country, new_anonymity in zip(ip_list, port_list, country_list, anonymity_list): proxy = Proxy() if new_ip is not None: proxy.set_value( ip=new_ip, port=new_port, country=new_country, anonymity=new_anonymity, source=self.name ) self.add_proxy(proxy)
def parse_page(self, response): utils.log(dir(response)) utils.log('body type:%s' % type(response.body)) utils.log('body_as_unicode type:%s' % type(response.body_as_unicode)) self.write(response.body) sel = Selector(response) infos = sel.xpath('//tbody/tr').extract() for i, info in enumerate(infos): if i == 0: continue val = Selector(text=info) ip = val.xpath('//td[1]/text()').extract_first() port = val.xpath('//td[2]/text()').extract_first() country = val.xpath('//td[6]/text()').extract_first() anonymity = val.xpath('//td[3]/text()').extract_first() https = val.xpath('//td[4]/text()').extract_first() proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy=proxy)
def crawl_list_by_no_batch(page_no, infos, urls): try: crawl_url = crawl_url_template.format(page_no) session = get_crawl_session() result = session.get(crawl_url, timeout=5).text Proxy.close_session(session) soup = bs(result, 'lxml') items = soup.find_all('div', class_='item cl') for item in items: update_time = item.find('span') if update_time: update_time = update_time.text if item.find('a'): title = item.find('a').text url = item.find('a')['href'] if url not in dup_set and url not in urls: dup_set.add(url) ddict = { 'title': title, 'update_time': update_time, 'url': url } infos.append(ddict) basic_publish_msg(list_queue, json.dumps(ddict)) except Exception, e: print e crawl_list_by_no(page_no)
def login(self, login_url, return_url, sess): pro = Proxy('proxy.txt') username = '******' password = '******' formash = '58FF9E339A' backurl = 'https%253A%252F%252Fdb.yaozh.com%252Fhmap' logindata = { "username": username, "pwd": password, "formhash": formash, "backurl": backurl } header = requests_headers() login = sess.post(login_url, data=logindata, headers=header, verify=False) cookie = login.cookies header["Referer"] = "http://icid.iachina.cn/?columnid_url=201509301401" response = sess.get(return_url, headers=header, proxies={"https:": pro.get_proxy()}, verify=False, cookies=cookie) return response.text
def send_result(self, response): proxy = Proxy() proxy.update_result( response['submissionId'], response['status'], response['run_time'], response['run_memory'], response['extra_message'], '', '', '', time.strftime('%Y-%m-%d %X', time.localtime(time.time())) )
def test_conversions(): proxy = Proxy(STR_TEST_VALUE) assert int(proxy) == int(STR_TEST_VALUE) assert float(proxy) == float(STR_TEST_VALUE) assert complex(proxy) == complex(STR_TEST_VALUE) Proxy.set_value(proxy, INT_TEST_VALUE) assert hex(proxy) == hex(INT_TEST_VALUE)
def remote_radmin_proxy(self,service_uuid, command, params): if self.services.has_option(service_uuid, 'radmin_pw'): host = self.services.get(service_uuid, 'radmin_host') params['password'] = self.services.get(service_uuid, 'radmin_pw') else: defer.returnValue('nocredentials') #modify the Regions.ini. We could do this later properly in RemoteController.cs if command == 'admin_create_region' or command == 'admin_restart' or command== 'admin_shutdown': if params.has_key('region_id'): params['regionID'] = params['region_id'] params['listen_ip'] = '0.0.0.0' params['listen_port'] = self.get_free_port() params['external_address'] = self.config.get('client','wan_ip') testresponse = self.update_region_ini(service_uuid, params,'add') elif command == 'admin_delete_region': self.update_region_ini(service_uuid, params,'del') proxy = Proxy(host) try: response = yield proxy.callRemote(command, params) if 'success' in response: if response['success']: defer.returnValue(response) if 'error' in response: response = {'status': {'code':'RADMIN_FAILED','feedback':response['error']}} if command == 'admin_create_region': self.update_region_ini(service_uuid, params,'del') except Error: if command == 'admin_create_region': self.update_region_ini(service_uuid, params,'del') response = {'status': {'code':'RADMIN_FAILED'}} defer.returnValue(response)
def test_operations(): print "Objects".center(80, '=') ty = ('Array', [('t', 'int')]) p = Proxy(ty) print p print "Tests".center(80, '=') assert p.typeof == ty b = p.add(2.0) c = b.add(2.0) d = c.add(2.0) print d print b.sqrt() print p > p print (p+p)+p print reduce(add, [p,p,p]*50) f = p > p f = p - p print f
def parse_page(self, response): logging.info("parse_page :%s" % response.url) next_url = self.parse_next_url(response) if next_url: yield scrapy.Request(url=next_url, callback=self.parse_page) trs = response.css('#footer table tr') for tr in trs: tds = tr.css("td") if len(tds) != 5: continue ip = tds[0].xpath('text()').extract_first() port = tds[1].xpath('text()').extract_first() country = tds[2].xpath('text()').extract_first() anonymity = tds[3].xpath('text()').extract_first() proxy = Proxy() proxy.set_value( ip=ip, port=port, country=country, anonymity=anonymity, source=self.name, ) self.add_proxy(proxy)
def api_proxy(): user = request.args['user'] if 'user' in request.args else None pkey = os.path.join(cfg.get("ipyno", "sshdir"), cfg.get('nova-admin', "vm_key")+'.pem') pcfg = dict(cfg.items('proxy')) try_ipydb = get_ipydb(request, False) if try_ipydb['status'] != 200: return return_json(None, try_ipydb['error'], try_ipydb['status']) ipydb = try_ipydb['data'] proxy = Proxy(pcfg, pkey) response = return_json(None, 'Method Not Allowed (%s): %s'%(request.method, request.url), 405) # return list of server objs from proxy (nova-admin auth) if request.method == 'GET': try_nova = get_nova(request, 'nova-admin') if try_nova['status'] != 200: response = return_json(None, try_nova['error'], try_nova['status']) elif not user: response = return_json(None, "Bad Request: missing user", 400) else: data = proxy.get_server(user) response = return_json(data['data']) if data['status'] == 200 else return_json(None, data['error'], data['status']) # return server obj from proxy for user - if does not exist create it (add user to proxy and db) # NOTE: this is the only API call that uses OAuth and is accessable by a normal user and not an admin elif request.method == 'POST': auth = check_auth(request.headers) if auth['error']: return return_json(None, auth['error'], 401) res = ipydb.get('user', auth['username']) # user already has a proxy if res: data = proxy.get_server(auth['username']) return return_json(data['data']) if data['status'] == 200 else return_json(None, data['error'], data['status']) # set proxy for user - get free port, add to db, add to nginx config vm = ipydb.reserve() port = ipydb.next_port(int(pcfg["pstart"]), int(pcfg["pend"])) if vm and port: vm = ipydb.update(vm['id'], user=auth['username'], port=port) data = proxy.add_server(auth['username'], vm['ip'], port) response = return_json(data['data']) if data['status'] == 200 else return_json(None, data['error'], data['status']) else: response = return_json(None, 'Service Unavailable: no free ipython servers available', 503) # delete vm from proxy based on user, remove proxy/user info from db (nova auth) elif request.method == 'DELETE': try_nova = get_nova(request, 'nova-admin') if try_nova['status'] != 200: response = return_json(None, try_nova['error'], try_nova['status']) elif not user: response = return_json(None, "Bad Request: missing user", 400) else: vm = ipydb.get('user', user) # user in DB and vaild auth - now we delete if vm: ipydb.drop_user(vm['id']) res = proxy.remove_server(user) # TODO: stop ipython on vm response = return_json("user '%s' removed"%user) if res['status'] == 200 else return_json(None, res['error'], res['status']) else: response = return_json(None, "Bad Request: invalid user %s"%user, 400) ipydb.exit() return response
def OnStop(self, event): self.toolbar.EnableTool(self.toolStart.GetId(), 1) self.toolbar.EnableTool(self.toolStop.GetId(), 0) menu = self.GetMenuBar().GetMenu(1) menu.FindItemByPosition(0).Enable(True) menu.FindItemByPosition(1).Enable(False) Proxy.end_catch()
def test_get_manifest(self): with HTTMock(docker_registry_mock): proxy = Proxy(self.config, "library/postgres") raw_manifest, _ = proxy.get_manifest(image_ref=TAG) manifest = json.loads(raw_manifest) self.assertEqual(list(manifest.keys()), ["schemaVersion", "mediaType", "config", "layers"])
def test_attrs(): class Test: pass proxy = Proxy(Test()) proxy.x = INT_TEST_VALUE assert proxy.x == INT_TEST_VALUE del proxy.x
def getRequestContent(url,proxy=False): if proxy: http = Proxy().getProxyHttp(headers); else: http = getHttp(); print(getUrl(url)) r = http.request('GET', getUrl(url)); return r.data.decode("utf-8");
def test_dict_proxy(self): d = {"key1": "val1", "key2": "val2"} p = Proxy(d) self.assertEqual(p["key1"], "val1") self.assertEqual(len(p.keys()), len(d.keys())) self.assertEqual(p, d) self.assertTrue(p == d) self.assertNotEqual(id(p), id(d)) del p["key1"]
def spot_proxy(self, ip, port, country): if country: country=country.decode('utf-8') p = Proxy(ip=ip, port=port, country=country) if self.proxy_pool.has_proxy(p.proxy_url()): logger.debug_class_fun(ProxyPoolHandler.__name__, "proxy exists, proxy_url = %s", p.proxy_url()) else: logger.debug_class_fun(ProxyPoolHandler.__name__,"new proxy, proxy_url = %s", p.proxy_url()) self.proxy_pool.add_proxy(p)
def online(self): config.logger.info("尝试上线。。。。") try: proxy = Proxy() response = proxy.online() self.is_online = True config.logger.info("上线成功,节点ID:%s" % response['siteId']) except Exception, msg: config.logger.error("上线失败,错误信息:%s" % msg)
def test_proxy(self): proxy = Proxy() self.addCleanup(proxy.terminate) proxy.start() oldenv = os.environ.copy() self.addCleanup(lambda: setattr(os, 'environ', oldenv)) os.environ['https_proxy'] = proxy.address self.assertRaises(VirtError, self.run_once) self.assertIsNotNone(proxy.last_path, "Proxy was not called") self.assertEqual(proxy.last_path, 'localhost:8443')
def add_proxy(self, pt): print "selected lo:", self.get_selected_layer_object() p = Proxy(self.get_selected_layer_object(), position=pt) p.name = str(self.last_id) p.id = self.last_id print "new proxy id:", p.id self.proxy_dct[self.last_id] = p if self.layer_type == LayerType.meta: print "adding meta" self.adjacency_dct[self.last_id] = [] self.resort_all_proxys() self.last_id += 1
def splice(self, start, stop, data): which = self.which if which in ('string', 'buffer'): immutable = self.struct[self.index] removed = immutable[start:stop] self.struct[self.index] = immutable[:start] + data + immutable[stop:] elif which == 'list': removed = self.struct[self.index][start:stop] self.struct[self.index][start:stop] = data else: raise Exception("Cannot splice %s" % which) Proxy.reproxy(self.struct.proxy, self.index, self.struct[self.index]) return removed
def get_submission(self): response = None is_valid = False while not is_valid: try: proxy = Proxy() response = proxy.get_submission() # config.logger.debug(response) except Exception, e: config.logger.error(e) self.is_online = False self.on_load() if response['valid'] == 'true': is_valid = True
def main(self): proxy = Proxy(); task = TaskPool(); try: proxy.load_proxy(config.proxyfile); proxy.load_agent(config.agentfile); task.load_task(config.taskfile); pid = 0; while pid < config.pidnum: cr = Crawle(proxy,task); cr.start(); pid += 1; except Exception as e: info = traceback.print_exc(); logger.error(format(e)); raise e;
def query_proxies(): """ 查询已存可使用的proxies :return: set """ query = Proxy.select().where(~(Proxy.status_code >> None)) return set([proxy.proxy for proxy in query])
def save(self, proxy_list, proxy_type='socks5', file_name="data/proxies.db"): """Save list of proxies in to the file """ with open(file_name, 'a') as f: for proxy in proxy_list: f.write(Proxy.get_data() + '\n')
def test_authenticated_proxy_http_get(self): self.proxy = Proxy(Client(self._conn, self._addr), b'Basic %s' % base64.b64encode(b'user:pass')) self.proxy.client.conn.queue((b'GET http://localhost:%d HTTP/1.1' % self.http_server_port) + CRLF) self.proxy._process_request(self.proxy.client.recv()) self.assertNotEqual(self.proxy.request.state, HttpParser.states.COMPLETE) self.proxy.client.conn.queue(CRLF.join([ b'User-Agent: proxy.py/%s' % version, b'Host: localhost:%d' % self.http_server_port, b'Accept: */*', b'Proxy-Connection: Keep-Alive', b'Proxy-Authorization: Basic dXNlcjpwYXNz', CRLF ])) self.proxy._process_request(self.proxy.client.recv()) self.assertEqual(self.proxy.request.state, HttpParser.states.COMPLETE) self.assertEqual(self.proxy.server.addr, (b'localhost', self.http_server_port)) self.proxy.server.flush() self.assertEqual(self.proxy.server.buffer_size(), 0) data = self.proxy.server.recv() while data: self.proxy._process_response(data) if self.proxy.response.state == HttpParser.states.COMPLETE: break data = self.proxy.server.recv() self.assertEqual(self.proxy.response.state, HttpParser.states.COMPLETE) self.assertEqual(int(self.proxy.response.code), 200)
def __init__(self, tcp_stack): # stack instance self.tcp_stack = tcp_stack pair = tcp_stack.get_connection_pair() # init destination and source self.src_ip , self.src_port = pair[0], pair[1] self.dst_ip , self.dst_port = pair[2], pair[3] # init proxy socket self.sock = Proxy.get_connection(pair[2:]) # init data IO queue self.queue = Queue(DATA_QUEUE_LENGTH) # init outcome_seq self.acked_outcome_seq = self.outcome_seq = abs(id(self)) & 0xffffffff # generate outcome seq according to id(self) self.income_seq = inc_with_mod(tcp_stack.seq_num, 0xffffffff) # remote sequence, add 1 to indicate we've received self.last_ack_send = 0 self.last_ack_count = 0 self.running = 1 self.ahead = -1 # ahead mode disabled self.resend_buf = deque() self.fin = False # debug only self.outcome_seq_base = self.outcome_seq self.income_seq_base = self.income_seq
def OnRecord(self, event): self.toolbar.EnableTool(self.toolStart.GetId(), 0) self.toolbar.EnableTool(self.toolStop.GetId(), 1) menu = self.GetMenuBar().GetMenu(1) menu.FindItemByPosition(0).Enable(False) menu.FindItemByPosition(1).Enable(True) record = Record.Record() self.nb.recordTab.tree.AppendNewRecord(record) if self.proxy == None: self.proxy = Proxy.thread_start() Proxy.begin_catch( callback = self.nb.recordTab.tree.AppendNewHit, filter = Proxy.DefaultContentFilter(), )
def get_proxies(page_url): print page_url raw_page = get_raw_page(page_url) proxy_list = [] for tr in raw_page.find('table', id='ip_list').find_all('tr')[1:]: td = tr.find_all('td') p = Proxy() p.proxy_country = td[1].img.get('alt').upper() p.proxy_ip = td[2].string p.proxy_port = td[3].string p.proxy_location = td[4].text.replace('\n', '').replace(' ', '') p.proxy_type = td[5].string p.connection_type = td[6].string.replace(u'代理', '').upper() p.validation_delay = td[7].find('div', {'class': 'bar'})['title'].replace(u'秒', '') p.connection_delay = td[8].find('div', {'class': 'bar'})['title'].replace(u'秒', '') p.validate_time = datetime.datetime.strptime('20'+td[9].string, '%Y-%m-%d %H:%M') proxy_list.append(p) return proxy_list
def test_proxy_authentication_failed(self): self.proxy = Proxy(Client(self._conn, self._addr), b'Basic %s' % base64.b64encode(b'user:pass')) with self.assertRaises(ProxyAuthenticationFailed): self.proxy._process_request(CRLF.join([ b'GET http://abhinavsingh.com HTTP/1.1', b'Host: abhinavsingh.com', CRLF ]))
def OnClose(self, event): # TODO: Stopping playing before exiting Record.CANCELLED = True # not enough try: if self.proxy: Proxy.stop() self.proxy.join() self.proxy = None import proxy.Agent as poster poster.kill_if() self.TryAutoSave() except Exception, e: import Logger log = Logger.getLogger() log.exception('Ignornig Exception when closing application:\n%s', e)
def __init__(self): # Create the communications thread. self.proxy = Proxy(IPConnection("10.42.0.1", 50001)) self.x = 0.0 self.y = 0.0 self.heading = 1.57 self.offset = 0.0 # Offset to record when the odometry has been forced. Thread.__init__(self)
def run(self): """ Creates test suite and runs the tests """ import testrunner import testloader self._proxy = Proxy("{0}:{1}".format(self.config.ADDRESS, self.config.PROXY_SERVER_PORT)) self._test_runner = testrunner.TestRunner(verbosity=2, output=swt.config.XML_FILE_DIR) self._test_loader = testloader.TestLoader() self._test_suite = self._test_loader.get_test_suite() self._test_runner.run(self._test_suite) self.end()
def spider_page(page_url, parse): headers = {'user-agent': utility.random_ua()} try: r = requests.get(page_url, headers=headers, timeout = 30) except: logger.traceback() return if r.status_code != 200: logger.error("get list page failed, url = %s, status_code = %d", page_url, r.status_code) return client = ProxyPoolClient() try: client.open() logger.debug_fun("connect ok") except Thrift.TException: logger.traceback() logger.debug_fun("connect failed, quit") return logger.debug_fun("get list page ok, url = %s", page_url) candidate_proxies = parse(r.content) for ip, port in candidate_proxies: logger.debug_fun("check proxy, ip = %s, port = %d", ip, port) proxy_url = Proxy.make_proxy_url(ip, port) ret, resp_second = validate_proxy(proxy_url) if not ret or resp_second > settings.PROXY_MAX_DELAY: logger.debug_fun("check proxy failed, proxy_url = %s", proxy_url) continue logger.debug_fun("check proxy ok, proxy_url = %s", proxy_url) try: proxy_exists = client.has_proxy(proxy_url) except: logger.traceback() logger.debug_fun("check proxy exists failed, proxy_url = %s", proxy_url) break if proxy_exists: logger.debug_fun("proxy exists, proxy_url = %s", proxy_url) else: try: country = utility.get_ip_country(ip) except: logger.traceback() logger.debug_fun("get country failed, proxy_url = %s", proxy_url) break try: client.spot_proxy(ip, port, country) except: logger.traceback() logger.debug_fun("spot new proxy failed, ip = %s, port = %d, country = %s", ip, port, country) break logger.debug_fun("spot new proxy, ip = %s, port = %d, country = %s", ip, port, country)
class TestProxy(unittest.TestCase): """ Unit test for Proxy """ def setUp(self): conf = ConfMock() self.proxy = Proxy(conf) def tearDown(self): urllib.urlcleanup() def testFTP(self): downloader = self.proxy.get_ftp_downloader() downloader.retrieve(REPO_URL, REPO_FILE) self.assertEqual(os.path.isfile(REPO_FILE), True)
def connect(self): """ Connect to the server. """ if self.proxy is None: self.proxy = Proxy(self._path1) self.handle = self.proxy.x_request(Create) # 将API与队列进行绑定 self.proxy.x_request(Register, self.handle, ptr1=self._queue.handle) self._queue.register(self._x_response) # 启动队列循环 # self._queue.start_pulling() self.proxy.x_request(Connect, self.handle, ptr1=byref(self.server), ptr2=byref(self.user), ptr3=self._local_path)
def test_authenticated_proxy_http_tunnel(self): self.proxy = Proxy(Client(self._conn, self._addr), b'Basic %s' % base64.b64encode(b'user:pass')) self.proxy.client.conn.queue(CRLF.join([ b'CONNECT localhost:%d HTTP/1.1' % self.http_server_port, b'Host: localhost:%d' % self.http_server_port, b'User-Agent: proxy.py/%s' % version, b'Proxy-Connection: Keep-Alive', b'Proxy-Authorization: Basic dXNlcjpwYXNz', CRLF ])) self.proxy._process_request(self.proxy.client.recv()) self.assertFalse(self.proxy.server is None) self.assertEqual(self.proxy.client.buffer, PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT) parser = HttpParser(HttpParser.types.RESPONSE_PARSER) parser.parse(self.proxy.client.buffer) self.assertEqual(parser.state, HttpParser.states.HEADERS_COMPLETE) self.assertEqual(int(parser.code), 200) self.proxy.client.flush() self.assertEqual(self.proxy.client.buffer_size(), 0) self.proxy.client.conn.queue(CRLF.join([ b'GET / HTTP/1.1', b'Host: localhost:%d' % self.http_server_port, b'User-Agent: proxy.py/%s' % version, CRLF ])) self.proxy._process_request(self.proxy.client.recv()) self.proxy.server.flush() self.assertEqual(self.proxy.server.buffer_size(), 0) parser = HttpParser(HttpParser.types.RESPONSE_PARSER) data = self.proxy.server.recv() while data: parser.parse(data) if parser.state == HttpParser.states.COMPLETE: break data = self.proxy.server.recv() self.assertEqual(parser.state, HttpParser.states.COMPLETE) self.assertEqual(int(parser.code), 200)
def __init__(self,args): Proxy.__init__(self,args) self.server_ip = args.server_ip self.server_port = args.server_port self.dst = (self.server_ip,self.server_port)
def get_api_name(self): if self.proxy is None: self.proxy = Proxy(self._path1) ptr = self.proxy.x_request(GetApiName) return c_char_p(ptr).value