async def get_page(url, proxy=None, proxy_auth=None, binary=False, verify=False, timeout=300): """Get data of the page (File binary of Response text)""" urllib3.disable_warnings() proxies = None auth = None if proxy and proxy_auth: proxies = {"http": proxy, "https": proxy} auth = HTTPProxyAuth(proxy_auth['username'], proxy_auth['password']) retry = 3 # Retry 3 times while retry > 0: try: with requests.Session() as session: session.proxy = proxies session.auth = auth response = session.get(url, verify=verify, timeout=timeout) if binary: return response.content return response.text except requests.exceptions.ConnectionError: retry -= 1
def get_featureOfInterest(query_uri_base, aws_urn=None): # assemble SOS query string for one or all stations q = None if aws_urn is not None: q = furl(query_uri_base + '/service').add({ 'service': 'SOS', 'version': '2.0.0', 'request': 'GetFeatureOfInterest', 'featureOfInterest': aws_urn }).url else: q = furl(query_uri_base + '/sos/kvp').add({ 'service': 'SOS', 'version': '2.0.0', 'request': 'GetFeatureOfInterest', }).url # run the query request creds = json.load(open('creds.json')) auth = HTTPProxyAuth(creds['username'], creds['password']) ga_proxy = {"http": creds['proxy']} headers = {'accept': 'application/json'} r = requests.get(q, headers=headers, proxies=ga_proxy, auth=auth) results = json.loads(r.text) # return one or all if aws_urn is not None: return results['featureOfInterest'][0] else: #return sorted(results['featureOfInterest'], key=lambda k: k['name']) return sorted(results['featureOfInterest'])
def test_chatwork_proxy(): rule = { 'name': 'Test Chatwork Rule', 'type': 'any', 'chatwork_apikey': 'xxxx1', 'chatwork_room_id': 'xxxx2', 'chatwork_proxy': 'http://proxy.url', 'chatwork_proxy_login': '******', 'chatwork_proxy_pass': '******', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = ChatworkAlerter(rule) match = {'@timestamp': '2021-01-01T00:00:00', 'somefield': 'foobarbaz'} with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'body': 'Test Chatwork Rule\n\n@timestamp: 2021-01-01T00:00:00\nsomefield: foobarbaz\n', } mock_post_request.assert_called_once_with( 'https://api.chatwork.com/v2/rooms/xxxx2/messages', params=mock.ANY, headers={'X-ChatWorkToken': 'xxxx1'}, proxies={'https': 'http://proxy.url'}, auth=HTTPProxyAuth('admin', 'password')) actual_data = mock_post_request.call_args_list[0][1]['params'] assert expected_data == actual_data
def test_telegram_proxy(): rule = { 'name': 'Test Telegram Rule', 'type': 'any', 'telegram_bot_token': 'xxxxx1', 'telegram_room_id': 'xxxxx2', 'telegram_proxy': 'http://proxy.url', 'telegram_proxy_login': '******', 'telegram_proxy_pass': '******', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = TelegramAlerter(rule) match = {'@timestamp': '2021-01-01T00:00:00', 'somefield': 'foobarbaz'} with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'chat_id': rule['telegram_room_id'], 'text': '⚠ *Test Telegram Rule* ⚠ ```\nTest Telegram Rule\n\n@timestamp: 2021-01-01T00:00:00\nsomefield: foobarbaz\n ```', 'parse_mode': 'markdown', 'disable_web_page_preview': True } mock_post_request.assert_called_once_with( 'https://api.telegram.org/botxxxxx1/sendMessage', data=mock.ANY, headers={'content-type': 'application/json'}, proxies={'https': 'http://proxy.url'}, auth=HTTPProxyAuth('admin', 'password')) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data
def _setup(self): plugings = [] cache_conf = NoCache() if Configuration.debug: if Configuration.log_file is not None: logging.basicConfig(filename=Configuration.log_file, level=Configuration.log_level, format=utils.get_log_format()) else: logging.basicConfig(level=Configuration.log_level, format=utils.get_log_format()) if Configuration.log_level == logging.DEBUG and Configuration.environment == constants.PRODUCTION_ENV: raise LogException plugings.append(LogPlugin()) s = requests.Session() s.mount('file://', FileAdapter()) if Configuration.proxy_url: s.proxies = utils.get_builded_proxy_url(Configuration.proxy_url, Configuration.proxy_port) if Configuration.proxy_user: s.auth= HTTPProxyAuth(Configuration.proxy_user, Configuration.proxy_pass) if Configuration.certificate and Configuration.c_key: s.cert=(Configuration.certificate, Configuration.c_key) else: s.verify = Configuration.certificate t = RequestsTransport(s, timeout=Configuration.timeout) if Configuration.cache: cache_conf = ObjectCache(location=Configuration.cache_location, seconds=Configuration.cache_duration) self._client = client.Client(Configuration.get_wsdl().strip(), plugins=plugings, transport=t, cache=cache_conf)
def get_img(self, url): self.headers['User-Agent'] = random.choice(self.ua_pool) s = requests.session() s.proxies = self.proxies s.auth = HTTPProxyAuth(self.proxyUser, self.proxyPass) if url.startswith('https'): url = "http://" + url[8:] self.headers['x-crawlera-use-https'] = "1" s.headers = self.headers # delay # start = time.time() # dur = start - self.curent # if start - self.curent < 0.3: # time.sleep(dur) try: data = s.get(url, timeout=15).content # self.curent = start return data except Exception as e: print e return False finally: s.close()
def __init__(self): self.option = Options() self.option.add_argument('--headless') self.option.add_argument('--disable-gpu') self.driver = webdriver.Chrome(options=self.option) self.driver.set_script_timeout(1) self.driver.set_page_load_timeout(10) # 通用文件夹\python打包\微博com无账号截图\dist # self.open_html_url = 'http://localhost:63342/Jeqee热门/spider/weibo/微博Com截图(代理)/weibocom_shot/exe文件/dist/' self.open_html_url = 'http://localhost:63342/Jeqee热门/spider/weibo/微博Com截图(代理)/weibocom_shot/' self.auth = HTTPProxyAuth('jeqee', 'jeqeeproxy') self.headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', 'Host': 's.weibo.com' } self.has_cookie_headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', 'Host': 's.weibo.com', 'Cookie': 'SINAGLOBAL=2079748157535.4148.1562656336661; _s_tentry=-; Apache=6138501991200.746.1578878421920; ULV=1578878421977:15:2:1:6138501991200.746.1578878421920:1577966277068; login_sid_t=4041ea8280741ef6a60c24968bf7ed52; cross_origin_proto=SSL; WBtopGlobal_register_version=307744aa77dd5677; secsys_id=d3e0da3cb4c0fb539669ba32cf3d0751; ALF=1610614524; SSOLoginState=1579078525; SCF=AncjsCf7zbAbUzNBAKOizieYzy1LkJJc5eum43dQUuxtx1pHo_67XdOiZfYQ5pr9nZip8tM5XaA6dshnDiGWE1s.; SUB=_2A25zGqMmDeRhGeRG4loZ8S_LzTmIHXVQUZPurDV8PUNbmtAfLU7mkW9NTeZINGNGUkBCRAcmRLrsnuAuL4q2BGyT; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFOfqD9fkE8ALcVFJfdDIpl5JpX5KzhUgL.FozR1KnReK2NSo-2dJLoIfQLxK-L12qL1KqLxKBLBonLB-2LxK-L1K5L12BLxK-LB-BL1KMLxKBLBo.L1-qLxK-LB.-L1hnLxK.L1-2LB.-LxK-L1K-L122LxKqL1hnL1K2LxK-L12-LB.zt; SUHB=0S7yje5GVwlj4F; wvr=6; UOR=,,v3.jqsocial.com; webim_unReadCount=%7B%22time%22%3A1579140897294%2C%22dm_pub_total%22%3A5%2C%22chat_group_client%22%3A0%2C%22allcountNum%22%3A43%2C%22msgbox%22%3A0%7D; WBStorage=42212210b087ca50|undefined' } # 初始化日志 self.logger = logging.getLogger('weibo_com') self.logger.setLevel(logging.DEBUG) self.logger.addHandler(fh)
def _to_server(self, logVal): if self.conf['protocol'] != None: c_url = self.conf['protocol'] + "://" + self.conf[ 'address'] + ":" + str(self.conf['port']) else: c_url = self.conf['address'] + ":" + str(self.conf['port']) utils.ilog(self.LOG_CLASS, "Sending json to: " + c_url) headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36" } if 'use_proxy' in list( self.conf.keys()) and self.conf['use_proxy'] is True: if "proxy_auth" in list(self.conf.keys()): cauth = HTTPProxyAuth(self.conf['proxy_auth']['username'], self.conf['proxy_auth']['password']) r = requests.put(c_url, json=logVal, proxies=self.conf['proxies'], auth=cauth, headers=headers) else: r = requests.put(c_url, json=logVal, proxies=self.conf['proxies'], headers=headers) else: r = requests.put(c_url, json=logVal, headers=headers) utils.ilog(self.LOG_CLASS, "Status Code: " + str(r.status_code), imp=True)
def alert(self, matches): body = '⚠ *%s* ⚠ ```\n' % (self.create_title(matches)) for match in matches: body += str(BasicMatchString(self.rule, match)) # Separate text of aggregated alerts with dashes if len(matches) > 1: body += '\n----------------------------------------\n' if len(body) > 4095: body = body[0:4000] + "\n⚠ *message was cropped according to telegram limits!* ⚠" body += ' ```' headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.telegram_proxy} if self.telegram_proxy else None auth = HTTPProxyAuth(self.telegram_proxy_login, self.telegram_proxy_password) if self.telegram_proxy_login else None payload = { 'chat_id': self.telegram_room_id, 'text': body, 'parse_mode': 'markdown', 'disable_web_page_preview': True } try: response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies, auth=auth) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to Telegram: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) elastalert_logger.info( "Alert sent to Telegram room %s" % self.telegram_room_id)
def getresults(): echo("getting results") headers = { 'token': apikey, } params = ( ('test_run', run_id), ) print(params) print(headers) if proxy == "": response = requests.get(url+'/api/external/output/', headers=headers, params=params) else: httpproxy = "http://"+proxy httpsproxy = "https://"+proxy proxies = {"http": httpproxy,"https": httpsproxy} if username == "": response = requests.get(url+'/api/external/output/', headers=headers, params=params, proxies=proxies) else: auth = HTTPProxyAuth(username, password) response = requests.get(url+'/api/external/output/', headers=headers, params=params, proxies=proxies, auth=auth) print("result request sent") resultset = "" if response.status_code >= 500: print(('[!] [{0}] Server Error'.format(response.status_code))) return None elif response.status_code == 404: print(('[!] [{0}] URL not found: [{1}]'.format(response.status_code,api_url))) return None elif response.status_code == 401: print(('[!] [{0}] Authentication Failed'.format(response.status_code))) return None elif response.status_code == 400: print(('[!] [{0}] Bad Request'.format(response.status_code))) return None elif response.status_code >= 300: print(('[!] [{0}] Unexpected Redirect'.format(response.status_code))) return None elif response.status_code == 200: resultset = json.loads(response.content.decode('utf-8')) echo(resultset) else: print(('[?] Unexpected Error: [HTTP {0}]: Content: {1}'.format(response.status_code, response.content))) if resultset["new_defects"] and "newdefects" in fail: exit(1) if resultset["reopened_defects"] != 0 and "reopeneddefects" in fail: exit(1) if resultset["flaky_defects"] != 0 and "newflaky" in fail: exit(1) if resultset["reopened_flaky_defects"] != 0 and "reopenedflaky" in fail: exit(1) if resultset["flaky_failures_breaks"] != 0 and "flakybrokentests" in fail: exit(1) if resultset["failed_test"] != 0 and "failedtests" in fail: exit(1) if resultset["broken_test"] != 0 and "brokentests" in fail: exit(1)
def make_request(urls): headerList = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0' ] proxyList = proxylst auth = HTTPProxyAuth(proxy_login, proxy_pass) r = (grequests.get(u, headers={'User-Agent': sys_random.choice(headerList)}, proxies={'http:': sys_random.choice(proxyList)}, auth=auth, stream=True) for u in urls) return grequests.map(r)
def __init__(self, **kwargs): self.business_page_url = kwargs['url'] self.bot_type = kwargs['bot'] self.is_timeout = False # Set generic fields # directly (don't need to be computed by the scrapers) # Note: This needs to be done before merging with DATA_TYPES, below, # so that BASE_DATA_TYPES values can be overwritten by DATA_TYPES values # if needed. (more specifically overwrite functions for extracting certain data # (especially sellers-related fields)) self.proxy_config = None if kwargs.get('proxies'): self.proxy_config = kwargs['proxies'] proxy_host = self.proxy_config["host"] proxy_port = self.proxy_config["port"] self.proxy_auth = HTTPProxyAuth(self.proxy_config["apikey"], "") self.proxies = { "http": "http://{}:{}/".format(proxy_host, proxy_port) } self.headers = { 'user-agent': 'Mozilla/5.0 (Linux; Android 5.1.1; YQ601 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36' } # update data types dictionary to overwrite names of implementing methods for each data type # with implmenting function from subclass # precaution mesaure in case one of the dicts is not defined in a scraper if not hasattr(self, "DATA_TYPES"): self.DATA_TYPES = {} if not hasattr(self, "DATA_TYPES_SPECIAL"): self.DATA_TYPES_SPECIAL = {} self.ALL_DATA_TYPES = self.BASE_DATA_TYPES.copy() self.ALL_DATA_TYPES.update(self.DATA_TYPES) self.ALL_DATA_TYPES.update(self.DATA_TYPES_SPECIAL)
def login(self, user_key, user_secret, verify_credentials): if config.app["proxy"]["server"] != "" and config.app["proxy"][ "port"] != "": args = { "proxies": { "http": "http://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"]), "https": "https://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"]) } } if config.app["proxy"]["user"] != "" and config.app["proxy"][ "password"] != "": auth = HTTPProxyAuth(config.app["proxy"]["user"], config.app["proxy"]["password"]) args["auth"] = auth self.twitter = Twython(keyring.get("api_key"), keyring.get("api_secret"), user_key, user_secret, client_args=args) else: self.twitter = Twython(keyring.get("api_key"), keyring.get("api_secret"), user_key, user_secret) if verify_credentials == True: self.credentials = self.twitter.verify_credentials()
def alert(self, matches): body = '' for match in matches: body += str(BasicMatchString(self.rule, match)) if len(matches) > 1: body += '\n----------------------------------------\n' if len(body) > 2047: body = body[ 0: 1950] + '\n *message was cropped according to chatwork embed description limits!*' headers = {'X-ChatWorkToken': self.chatwork_apikey} # set https proxy, if it was provided proxies = { 'https': self.chatwork_proxy } if self.chatwork_proxy else None auth = HTTPProxyAuth( self.chatwork_proxy_login, self.chatwork_proxy_pass) if self.chatwork_proxy_login else None params = {'body': body} try: response = requests.post(self.url, params=params, headers=headers, proxies=proxies, auth=auth) response.raise_for_status() except RequestException as e: raise EAException( "Error posting to Chattwork: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) elastalert_logger.info("Alert sent to Chatwork room %s" % self.chatwork_room_id)
def with_ven_anonymizer(cls): return Browser(proxies={ 'http': get_environment_variable('VEN_ANONYMIZER_PROX_HTTP') }, auth=HTTPProxyAuth( get_environment_variable('VEN_ANONYMIZER_LOGIN'), get_environment_variable('VEN_ANONYMIZER_PASS')))
def test_client_works_invalid_proxy(self): proxies = {'https': '0.0.0.0:0'} proxy_auth = HTTPProxyAuth("test", "test") client = configcatclient.create_client_with_auto_poll( _SDK_KEY, proxies=proxies, proxy_auth=proxy_auth) self.assertEqual('default value', client.get_value('keySampleText', 'default value')) client.stop()
def test_dingtalk_proxy(): rule = { 'name': 'Test DingTalk Rule', 'type': 'any', 'dingtalk_access_token': 'xxxxxxx', 'dingtalk_msgtype': 'action_card', 'dingtalk_single_title': 'elastalert', 'dingtalk_single_url': 'http://xxxxx2', 'dingtalk_btn_orientation': '1', 'dingtalk_btns': [ { 'title': 'test1', 'actionURL': 'https://xxxxx0/' }, { 'title': 'test2', 'actionURL': 'https://xxxxx1/' } ], 'dingtalk_proxy': 'http://proxy.url', 'dingtalk_proxy_login': '******', 'dingtalk_proxy_pass': '******', 'alert': [], 'alert_subject': 'Test DingTalk' } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = DingTalkAlerter(rule) match = { '@timestamp': '2021-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'msgtype': 'actionCard', 'actionCard': { 'title': 'Test DingTalk', 'text': 'Test DingTalk Rule\n\n@timestamp: 2021-01-01T00:00:00\nsomefield: foobarbaz\n', 'btnOrientation': rule['dingtalk_btn_orientation'], 'btns': rule['dingtalk_btns'] } } mock_post_request.assert_called_once_with( 'https://oapi.dingtalk.com/robot/send?access_token=xxxxxxx', data=mock.ANY, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8' }, proxies={'https': 'http://proxy.url'}, auth=HTTPProxyAuth('admin', 'password') ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data
def invoke(self, request, method): try: if not isinstance(request, HttpRequest): raise HttpException('Invalid HTTP request object.', 400) elif request.uri is None: raise HttpException('Invalid HTTP request URI.', 400) # use proxies and credentials if provided proxies = None auth = None if request.useProxy is True: proxies = request.proxies if request.credentials is not None: auth = HTTPProxyAuth(request.credentials.username, request.credentials.password) r = None if method == HttpMethod.GET: r = requests.get(url=request.uri, params=request.queryParams, headers=request.headers, proxies=proxies, auth=auth) elif method == HttpMethod.POST: r = requests.post(url=request.uri, data=json.dumps(request.payload), headers=request.headers, proxies=proxies, auth=auth) self.__log(r) # handle if any error occurred if r.status_code >= 400: raise HttpException(r.message, r.status_code) return HttpResponse(r.text) except HttpException as ex: self.__logErr('Error: %s (Code: %d)' % ex.message, ex.erorCode) return HttpResponse(payload=ex.message, status=False) except requests.exceptions.ConnectionError as err: self.__logErr('Error: %s' % err.args[0]) return HttpResponse(payload='Connection error', status=False) except requests.exceptions.Timeout as err: self.__logErr('Error: %s' % str(err)) return HttpResponse(payload='Connection timed out', status=False) except requests.exceptions.TooManyRedirects as err: self.__logErr('Error: %s' % str(err)) return HttpResponse(payload='Too many redirects', status=False) except requests.exceptions.RequestException as err: self.__logErr('Error: %s' % str(err)) return HttpResponse(payload='Web request exception', status=False)
def proxy_load(): try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('8.8.8.8', 80)) ip = s.getsockname()[0] finally: s.close() proxies = '' if ip[0:7] == '10.244.': # 在公司 proxies = {} auth = HTTPProxyAuth() #proxies = {} #auth = None else: proxies = {} auth = HTTPProxyAuth() return proxies,auth
def __init__(self, api_key, user_agent=None, cert='crawlera-ca.crt', log=False): self.user_agent = user_agent self.api_key = api_key self.cert = cert self.log = log self.proxy_auth = HTTPProxyAuth(self.api_key, "")
def _get(self, url): #url = "https://google.com" auth = HTTPProxyAuth("customero", "iUyET3ErxR") proxies = {"http": "paygo.crawlera.com:8010"} r = requests.get(url, #headers=headers, proxies=proxies, #timeout=timeout, auth=auth) return r
def getCrawleraParsedSource(url, source_url, crawlera): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', } tryout = 1 while True: proxy_auth = HTTPProxyAuth("xxxxx", "") proxy = { "https": "https://xxxx:@proxy.crawlera.com:8010", "http": "http://xxxx:@proxy.crawlera.com:8010" } headers = {'User-Agent': 'Mozilla/5.0', 'X-Crawlera-Use-HTTPS': '1'} s = requests.Session() s.mount('http://', HTTPAdapter(max_retries=1)) s.mount('https://', HTTPAdapter(max_retries=1)) try: if crawlera is True: r = s.get(url, headers=headers, timeout=50, proxies=proxy, verify='xxx-ca.crt') # r = s.get(url, headers=headers, timeout=30, proxies=proxy, auth=proxy_auth, verify='xxxx-ca.crt') else: r = s.get(url, headers=headers, timeout=30, verify=False) print(r.status_code, url) if r.status_code == 200: content = (r.content).decode('UTF-8', 'ignore') ps = html.fromstring(content, source_url) ps.make_links_absolute() return ps, content #html.fromstring(content) elif r.status_code == 404: return None, None else: pass except Exception as E: print(E) return None, None tryout += 1 if crawlera is False: if tryout == 3: return None, None
def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret, sessionObject, *a, **kw): args = None if config.app["proxy"]["server"] != "" and config.app["proxy"]["port"] != "": args = {"proxies": {"http": "http://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"]), "https": "https://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"])}} if config.app["proxy"]["user"] != "" and config.app["proxy"]["password"] != "": auth = HTTPProxyAuth(config.app["proxy"]["user"], config.app["proxy"]["password"]) args["auth"] = auth super(streamer, self).__init__(app_key, app_secret, oauth_token, oauth_token_secret, client_args=args, *a, **kw) self.session = sessionObject self.muted_users = self.session.db["muted_users"]
def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret, timeout=300, retry_count=None, retry_in=10, client_args=None, handlers=None, chunk_size=1, session=None): self.session = session args = None if config.app["proxy"]["server"] != "" and config.app["proxy"]["port"] != "": args = {"proxies": {"http": "http://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"]), "https": "https://{0}:{1}".format(config.app["proxy"]["server"], config.app["proxy"]["port"])}} if config.app["proxy"]["user"] != "" and config.app["proxy"]["password"] != "": auth = HTTPProxyAuth(config.app["proxy"]["user"], config.app["proxy"]["password"]) args["auth"] = auth super(timelinesStreamer, self).__init__(app_key, app_secret, oauth_token, oauth_token_secret, timeout=60, retry_count=None, retry_in=180, client_args=args, handlers=None, chunk_size=1) self.lists = self.session.lists
def check_ip_list(self, ip, options=None, *args, **kwargs): ''' Checks a given IP against the blacklist :param self: :param ip: The IP address that we want to look for :param options: The OptParse options from the main() function :return Found|No Result: ''' session = requests.Session() # Skip the feed if it is disabled in config if hasattr(self, "disabled") and self.disabled: return "Skipped - Disabled" # If the user supplied a proxy, set the proxy information for requests if options.proxy: session.proxies = {"http": options.proxy, "https": options.proxy} session.auth = HTTPProxyAuth(options.proxy_user, options.proxy_pass) # Try to pull down the data from the feed URL try: result = session.get(self.url) if result.status_code == 200: # If the threat feed is in CIDR notation, pull all the listed subnets # then see if the IP is a member of each one, if we find it stop checking # If NOT CIDR notation, do the normal IP check if self.format == "cidr": for cidr in [ IPNetwork(cidr) for cidr in re.findall( "((?:\d{1,3}\.){3}\d{1,3}(?:/\d\d?))", result.content) ]: if IPAddress(ip) in cidr: return "Found" return "No Result" else: matches = re.findall(ip, result.content) if matches: return "Found" else: return "No Result" else: cprint( "[!] There was an issue attemping to connect to: {url}". format(url=self.url), RED) return "Error" except rexp.ConnectionError as e: cprint( "[!] There was an issue attemping to connect to: {url}".format( url=self.url), RED) return "Error"
def generate_proxy_auth(self): username = '' username_prifix = 'lum-customer-%s-zone-gen' % self.username username += username_prifix if self.country: username += '-country-%s' % self.country if self.dns: username += '-dns-%s' % self.dns proxy_session_tag = 'sid%s_%s' % (self.session, self.base) username += '-session-%s' % proxy_session_tag authentication_code = self.password self.proxy_auth = HTTPProxyAuth(username, authentication_code)
def __init__(self): self.tmall_url = 'https://detail.tmall.com/item.htm?id={}' self.taobao_goods_url = 'https://item.taobao.com/item.htm?ft=t&id={}' self.proxy = {'http': 'http-dyn.abuyun.com:9020'} self.auth = HTTPProxyAuth('HG3T29V0U33H432D', 'CF9328D54686ED24') self.headers = { # 'accept-encoding': 'gzip, deflate, br', # 'accept-language': 'zh-CN,zh;q=0.9', # 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', # 'Cookie': 'enc=VfS9pXV44E9bUOwMfVUhsI2f5KZoEWQRTTlmnAHeMuXbeGacMJCCOJLBAJ3Yg9wnm1mkcs7CYgKbLwwkj7b6zw%3D%3D;' }
def alert(self, matches): body = '' title = u'%s' % (self.create_title(matches)) for match in matches: body += str(BasicMatchString(self.rule, match)) if len(matches) > 1: body += '\n----------------------------------------\n' if len(body) > 2047: body = body[ 0: 1950] + '\n *message was cropped according to discord embed description limits!*' proxies = {'https': self.discord_proxy} if self.discord_proxy else None auth = HTTPProxyAuth( self.discord_proxy_login, self.discord_proxy_password) if self.discord_proxy_login else None headers = {"Content-Type": "application/json"} data = {} data["content"] = "%s %s %s" % (self.discord_emoji_title, title, self.discord_emoji_title) data["embeds"] = [] embed = {} embed["description"] = "%s" % (body) embed["color"] = (self.discord_embed_color) if self.discord_embed_footer: embed["footer"] = {} embed["footer"]["text"] = (self.discord_embed_footer ) if self.discord_embed_footer else None embed["footer"]["icon_url"] = ( self.discord_embed_icon_url ) if self.discord_embed_icon_url else None else: None data["embeds"].append(embed) try: response = requests.post(self.discord_webhook_url, data=json.dumps(data), headers=headers, proxies=proxies, auth=auth) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException( "Error posting to Discord: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) elastalert_logger.info("Alert sent to the webhook %s" % self.discord_webhook_url)
def getProxy(): url = 'http://api.xicidaili.com/free2016.txt' proxy_str = '''http://192.168.107.27:8080''' proxies = {"http": proxy_str, "https": proxy_str} # url = 'http://www.xicidaili.com/nt/%d' % pageproxies session = requests.session() session.proxies = proxies session.auth = HTTPProxyAuth('zhang_qiang_neu', '3edc#EDC') session.trust_env = False html = session.get(url).content # html = requests.request('GET', url, headers=headers, proxies=proxies) print(html)
def _read_url(url, HTTPS=False): proxy_host = "proxy.crawlera.com" proxy_port = "8010" proxy_auth = HTTPProxyAuth("adb5925b628547c6b17135ff6237f87f", "") proxies = {"https": "https://{}:{}/".format(proxy_host, proxy_port)} response = requests.get(url, headers=REQUEST_HEADERS, verify=False, proxies=proxies, auth=proxy_auth, timeout=5.0) return response