def pullid(): global company global companyid cookiejar = initialReq() cookiejar = authReq(cookiejar) if (ssl_validation): opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler()) else: opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler(), HTTPSHandler(context=sslvalidation())) query = "count=10&filters=List(resultType-%3ECOMPANIES)&" + urlencode( {"keywords": company} ) + "&origin=SWITCH_SEARCH_VERTICAL&q=all&queryContext=List(spellCorrectionEnabled-%3Etrue,relatedSearchesEnabled-%3Efalse)&start=0" headers = { "Host": "www.linkedin.com", "User-Agent": user_agent, "Accept": "application/vnd.linkedin.normalized+json+2.1", "x-restli-protocol-version": "2.0.0", "Cookie": getcookie(cookiejar), "Csrf-Token": ajaxtoken(cookiejar), } req = Request( "https://www.linkedin.com/voyager/api/search/blended?" + query, None, headers) data = opener.open(req, timeout=timeout).read() content = json.loads(data) for companyname in content["included"]: id = companyname["entityUrn"].split(":") print("{:.<40}: {:s}".format(companyname["name"] + " :", id[3])) companyid = input("\nSelect company ID value: ")
def authReq(cookiejar): if (ssl_validation): opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler()) else: opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler(), HTTPSHandler(context=sslvalidation())) lcsrf = logincsrf(cookiejar) if (lcsrf is None): print(bcolors.NONERED + '[-] Failed to pull CSRF token' + bcolors.ENDLINE) data = urlencode({ "session_key": linkedin_username, "session_password": linkedin_password, "isJsEnabled": "false", "loginCsrfParam": lcsrf }).encode("utf-8") headers = { "Host": "www.linkedin.com", "User-Agent": user_agent, "Content-type": "application/x-www-form-urlencoded", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Cookie": getcookie(cookiejar), "X-IsAJAXForm": "1", } req = Request("https://www.linkedin.com/uas/login-submit", headers) f = opener.open(req, timeout=timeout, data=data) return cookiejar
def curl(self, url, params=None, timeout=None): queryString = self.buildQS(params) self.traceField("URL", url) self.traceField("postData", queryString) proto = "https" if self._useHTTPS else "http" if self._proxy: opener = build_opener(HTTPHandler(), ValidHTTPSHandler(), ProxyHandler({proto: self._proxy})) else: opener = build_opener(HTTPHandler(), ValidHTTPSHandler()) queryString = queryString.encode('utf-8') currentOpener = urllib.request._opener if PY_3 else urllib2._opener install_opener(opener) if timeout: response = urlopen(url, queryString, timeout) else: response = urlopen(url, queryString) install_opener(currentOpener) result = response.read() result = result.decode('utf-8') return result
def initialReq(): cookiejar = CookieJar() if ('ssl_validation'): opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler()) else: opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler(), HTTPSHandler(context=sslvalidation())) headers = { "Host": "www.linkedin.com", "Agent": user_agent, } req = Request("https://www.linkedin.com") f = opener.open(req, timeout=timeout) return cookiejar
def get(url): opener = build_opener(HTTPHandler()) resp = opener.open(url) """ 返回某个类对象,其属性包含 headers-> dict, code-> int, text-> 文本, body-> 字节码等相关属性 """ headers = dict(resp.getheaders()) try: encoding = headers['Content-Type'].split('=')[-1] except: encoding = 'utf-8' code = resp.code body = resp.read() text = body.decode(encoding) return Response(headers=headers, encoding=encoding, code=code, body=body, text=text)
def http_download(download_url, outfile, proxy_url=None, proxy_port=None): if proxy_url: proxy = "{}:{}".format(proxy_url, proxy_port) mainlog.info("Using a proxy : {}".format(proxy)) urlopener = build_opener(ProxyHandler({ 'https': proxy, 'http': proxy }), HTTPRedirectHandler()) else: mainlog.info("Not using a proxy") urlopener = build_opener(HTTPHandler(), HTTPSHandler(), HTTPRedirectHandler()) urlopener.addheaders = [( 'User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0' )] datasource = urlopener.open(download_url) out = open(outfile, 'wb') while True: d = datasource.read(8192) # self.logger.debug("Downloaded {} bytes".format(len(d))) if not d: break else: out.write(d) out.flush() out.close() datasource.close()
def __init__(self, writing=WRITING_NATIVE, opener=None, retry_times=4, executor=_g_executor, timeout=4, service_urls=('http://translate.google.com', ), debug=False): self._DEBUG = debug self._MIN_TASKS_FOR_CONCURRENT = 2 self._opener = opener self._languages = None self._TIMEOUT = timeout if not self._opener: debuglevel = self._DEBUG and 1 or 0 self._opener = build_opener(HTTPHandler(debuglevel=debuglevel), HTTPSHandler(debuglevel=debuglevel)) self._RETRY_TIMES = retry_times self._executor = executor self._writing = writing if _is_sequence(service_urls): self._service_urls = service_urls else: self._service_urls = (service_urls, )
def _setup_url_opener_if_necessary(self): if self._url_opener is not None: return kwargs = dict() # disable SSL verification if requested if not self._verify_ssl_certificates: ssl_hosts = [ server for server in self._servers if server.url.startswith('https') ] if ssl_hosts: context = ssl.create_default_context() context.check_hostname = False context.verify_mode = ssl.CERT_NONE kwargs['context'] = context # setup URL openers - add pre-emptive basic authentication http_handler = HTTPHandler() https_handler = HTTPSHandler(**kwargs) password_manager = HTTPPasswordMgrWithDefaultRealm() auth_handlers = [] # setup auth handler if we have any servers requiring authentication for server in self._servers: if server.username: password_manager.add_password(None, server.url, server.username, server.password) if password_manager.passwd: auth_handler = PreemptiveBasicAuthHandler(password_manager) auth_handlers.append(auth_handler) self._url_opener = build_opener(http_handler, https_handler, *auth_handlers)
def get_asf_cookie(user, password): logging.info("logging into asf") login_url = "https://urs.earthdata.nasa.gov/oauth/authorize" client_id = "BO_n7nTIlMljdvU6kRRB3g" redirect_url = "https://auth.asf.alaska.edu/login" user_pass = base64.b64encode(bytes(user + ":" + password, "utf-8")) user_pass = user_pass.decode("utf-8") auth_cookie_url = f"{login_url}?client_id={client_id}&redirect_uri={redirect_url}&response_type=code&state=" context = {} opener = build_opener(HTTPCookieProcessor(cookie_jar), HTTPHandler(), HTTPSHandler(**context)) request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) try: response = opener.open(request) except HTTPError as e: if e.code == 401: logging.error("invalid username and password") return False else: # If an error happens here, the user most likely has not confirmed EULA. logging.error(f"Could not log in. {e.code} {e.response}") return False if check_cookie_is_logged_in(cookie_jar): # COOKIE SUCCESS! cookie_jar.save(cookie_jar_path) logging.info("successfully logged into asf") return True logging.info("failed logging into asf") return False
def setup_method(self, method): self.cookies = CookieJar() self.opener = build_opener(HTTPRedirectHandler(), HTTPHandler(debuglevel=0), HTTPSHandler(debuglevel=0), HTTPCookieProcessor(self.cookies)) self.application_process = Process(target=main) self.application_process.start()
def __init__(self, elem=None, parent=None, method=lambda: None): self.elem = elem self.parent = parent self.method = method # initialize opener only once if EndPoint.opener is None: EndPoint.opener = OpenerDirector() EndPoint.opener.add_handler(HTTPHandler()) EndPoint.opener.add_handler(HTTPSHandler(context=CTX))
def instanciate_template(tpl_id): urlopener = build_opener(HTTPHandler(), HTTPSHandler()) url = configuration.get( "DownloadSite", "base_url") + "/instanciate_template?tpl_id={}".format(tpl_id) op = urlopener.open(url) doc_id = int(op.read().decode()) op.close() return doc_id
def proxy_get(url): opener = build_opener(HTTPHandler(), HTTPCookieProcessor(CookieJar()), ProxyHandler(proxies={'http': '39.137.69.7:8080'})) resp = opener.open(url) if resp.code == 200: bytes = resp.read() print(bytes) with open('a.html', 'wb') as f: f.write(bytes)
def proxy_get(url): opener = build_opener( HTTPHandler(), HTTPCookieProcessor(CookieJar()), ProxyHandler(proxies={'https': '113.119.38.80:3128'})) resp = opener.open(url, timeout=10) if resp.code == 200: bytes = resp.read() # print(bytes.decode('utf-8')) with open('baidu_ip.html', 'wb') as f: f.write(bytes)
def remove_documents(doc_ids): mainlog.debug("Deleting document {} from server".format(str(doc_ids))) urlopener = build_opener(HTTPHandler(), HTTPSHandler()) for doc_id in doc_ids: mainlog.debug("Deleting document {} from server".format(doc_id)) url = configuration.get( "DownloadSite", "base_url") + "/remove_file?file_id={}".format(doc_id) urlopener.open(url)
def _sendData(self, url, dataDict=None): try: # then connect to webserver a send json # set debuglevel=0 for no messages opener = build_opener(HTTPHandler(debuglevel=0)) data = urlencode(dataDict).encode() content = opener.open(url, data=data).read() now = time.time() os.utime(self._getUuidFileName(), (now, now)) except Exception as e: pass
def download(url): opener = build_opener(HTTPHandler(), ProxyHandler(proxies={'http': ''})) req = Request(url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36' }) resp = opener.open(req) if resp.code == 200: bytes = resp.read() datas = bytes.decode('utf-8') if datas: parse(datas)
class TestClient(object): def __init__(self, host, port, timeout=5): self.host = host self.port = port self.timeout = timeout opener_director = OpenerDirector() opener_director.add_handler(HTTPHandler()) def send_request(self, url, headers={}, arguments=None): if not isinstance(arguments, bytes): arguments = bytes(json.dumps(arguments), 'ascii') request = Request(method='POST', url=url, headers=headers, data=arguments) with self.opener_director.open(request, timeout=self.timeout) as response: if response.status != 200: raise HTTPError(url, response.status, response.msg, response.headers, None) return json.loads(str(response.read(), 'ascii')) class Scope(object): def __init__(self, harness): self.harness = harness self.id = uuid.uuid4().hex def __enter__(self): return self def __exit__(self, type, value, traceback): pass def __repr__(self): return '{}({})'.format(type(self).__name__, repr(self.id)) def send_request(self, arguments=None): return self.harness.send_request(url='http://{}:{}/{}'.format( self.harness.host, self.harness.port, self.id), headers={ 'Accept': 'application/json', 'Content-Type': 'application/json' }, arguments=arguments) def url(self, path=''): return 'http://{}:{}/{}.{}'.format(self.harness.host, self.harness.port, self.id, path) def scope(self): return TestClient.Scope(self)
def setup(self, user=None, apikey=None): if user == None: user = self.user apikey = self.apikey if user == None: user, apikey = self.getcredentials() self.opener = build_opener(HTTPSHandler(),HTTPHandler()) if self.verbose: print(('%s:%s'%(user, apikey)).encode('utf8'),file=sys.stderr) self.basicauthhdr = "Basic %s"%(base64.b64encode(('%s:%s'%(user, apikey)).encode('utf8')),) if self.verbose: print(self.basicauthhdr,file=sys.stderr)
def get(url, proxies): opener = build_opener(HTTPHandler(), ProxyHandler(proxies)) resp = opener.open(Request(url, headers=header.get_headers())) if resp.code == 200: print('--请求成功--') html = resp.read() html_text = html.decode() s = re.findall( r"<a href='/ipv4/\d+\.\d+\.\d+\.\d+'>(\d+\.\d+\.\d+\.\d+)</a>", html_text) print('当前主机的IP: ', s)
def get_asf_file(url, output_path, chunk_size=16*1024): # 8 kb default context = {} opener = build_opener(HTTPCookieProcessor(cookie_jar), HTTPHandler(), HTTPSHandler(**context)) request = Request(url) response = opener.open(request) with open(output_path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: break f.write(chunk)
def post_info(url, **kwargs): # opener = build_opener() opener = OpenerDirector() opener.add_handler(HTTPHandler()) form_data = urlencode(kwargs) req = Request( url, data=form_data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', } ) return opener.open(req)
def requestPage(self, address=""): req = Request(address, None, self.headers) #Create a CookieJar object to hold the cookies #Create an opener to open pages using the http protocol and to process cookies. opener = build_opener(HTTPCookieProcessor(self.cj), HTTPHandler()) if address.startswith("https"): ssl._create_default_https_context = ssl._create_unverified_context #Query the website and return the html to the variable 'page' response = opener.open(req) return response, self.cj
def get_cookie(url): url = url cookies = http.cookiejar.LWPCookieJar() handlers = [ HTTPHandler(), HTTPSHandler(), HTTPCookieProcessor(cookies) ] opener = build_opener(*handlers) fetch(opener, url) dump(cookies)
def recon(title, cookiejar, count, start): if (ssl_validation): opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler()) else: opener = build_opener(HTTPCookieProcessor(cookiejar), HTTPHandler(), HTTPSHandler(context=sslvalidation())) if (title is None): query = "count=" + str( count ) + "&filters=List(currentCompany-%3E" + str( companyid ) + ",resultType-%3EPEOPLE" + ")&origin=FACETED_SEARCH&q=all&queryContext=List(spellCorrectionEnabled-%3" + "Etrue,relatedSearchesEnabled-%3Etrue,kcardTypes-%3ECOMPANY%7CJOB_TITLE)&start=" + str( start) else: query = "count=" + str( count ) + "&filters=List(currentCompany-%3E" + str( companyid ) + ",resultType-%3EPEOPLE,title-%3E" + urlencode({ str(title): None }).split( "=" )[0] + ")&origin=FACETED_SEARCH&q=all&queryContext=List(spellCorrectionEnabled-%3" + "Etrue,relatedSearchesEnabled-%3Etrue,kcardTypes-%3ECOMPANY%7CJOB_TITLE)&start=" + str( start) headers = { "Host": "www.linkedin.com", "User-Agent": user_agent, "Accept": "application/vnd.linkedin.normalized+json+2.1", "x-restli-protocol-version": "2.0.0", "Cookie": getcookie(cookiejar), "Csrf-Token": ajaxtoken(cookiejar), } req = Request( "https://www.linkedin.com/voyager/api/search/blended?" + query, None, headers) f = opener.open(req, timeout=timeout) return f.read()
def get_jd(): url = 'http://jd.com' request = Request(url) # 生成opener对象(浏览器) httphandler = HTTPHandler() opener = build_opener(httphandler) # 可以增加多个处理器Handler # 通过opener发起请求 resp = opener.open(request) if resp.code == 200: bytes = resp.read() print(bytes.decode('utf-8'))
def check_cookie(self, file_check='https://urs.earthdata.nasa.gov/profile'): ''' Validate cookie before we begin Returns ------- bool Whether cookie valid. ''' if self.cookie_jar is None: self.errMsg = "No cookie jar" return False # File we know is valid, used to validate cookie # Apply custom Redirect Handler opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): self.errorMsg = 'Not logged in, try again' return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) # For security make user accessible only os.chmod(self.cookie_jar_path, 0o600) except HTTPError: # If we get this error, again, it likely means the user has not # agreed to current EULA self.errorMsg = "\nIMPORTANT: User appears to lack permissions" \ " to download data from the Earthdata Datapool." \ "\n\nNew users must have an account at " \ "Earthdata https://urs.earthdata.nasa.gov" return False # These return codes indicate the USER has not been approved to # download the data if resp_code in (300, 301, 302, 303): self.errorMsg = \ f"Redirect ({resp_code}) occured, invalid cookie value!" return False # These are successes! if resp_code in (200, 307): self.errorMsg = '' return True return False
def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Handler opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we get this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "User appears to lack permissions to download data from the Earthdata Datapool." ) print( "\n\nNew users: you must first have an account at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False
def get_jd(): url = 'http://www.jd.com/' req = Request(url) #生成opener对象 httphandler = HTTPHandler() opener = build_opener(httphandler) resp = opener.open(req) if resp.code == 200: bytes = resp.read() print(bytes.decode('utf-8'))
def __init__(self, opener=None, retry_times=4, executor=_g_executor, timeout=4, debug=False): self._DEBUG = False self._MIN_TASKS_FOR_CONCURRENT = 2 self._opener = opener self._languages = None self._TIMEOUT = timeout if not self._opener: debuglevel = self._DEBUG and 1 or 0 self._opener = build_opener( HTTPHandler(debuglevel=debuglevel), HTTPSHandler(debuglevel=debuglevel)) self._RETRY_TIMES = retry_times self._executor = executor
def __init__(self, *args, **kwargs): self.args = args self.kw = kwargs HTTPHandler.__init__(self)