def get_stock_html(ticker_name): # Create opener object opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler()) # Add header to request """ some websites like to block automated agents like this, so to be on the safe side you’re being sneaky here and setting the user agent you send to the server so you appear to be a completely different web browser . In this case, you’re pretending to be Internet Explorer 7 running on Windows XP. You can find other user agent strings by doing a web search for “user agent strings. """ opener.addheaders = [('User-agent', "Mozilla/4.0 (compatible; MSIE 7.0; " "Windows NT 5.1; .NET CLR 2.0.50727; " ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)")] # Read web page with opener url = "http://finance.yahoo.com/q?s=" + ticker_name response = opener.open(url) return ''.join(response.readlines())
def test_cookie_redirect(self): # cookies shouldn't leak into redirected requests from cookielib import CookieJar from test.test_cookielib import interact_netscape cj = CookieJar() interact_netscape(cj, "http://www.example.com/", "spam=eggs") hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n") hdeh = urllib2.HTTPDefaultErrorHandler() hrh = urllib2.HTTPRedirectHandler() cp = urllib2.HTTPCookieProcessor(cj) o = build_test_opener(hh, hdeh, hrh, cp) o.open("http://www.example.com/") self.assert_(not hh.req.has_header("Cookie"))
def __init__(self): """ class initialisation, creates cookie jar and headers """ self.lastpage = None self.lasterror = None self.cj = CookieJar() self.cookieH = urllib2.HTTPCookieProcessor(self.cj) self.redirectH = urllib2.HTTPRedirectHandler() self.proxyH = None self.opener = urllib2.build_opener(self.cookieH, self.redirectH)
def __init__( self, proxy=None, user_agent='Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0' ): self.redirect_handler = urllib2.HTTPRedirectHandler() self.http_handler = urllib2.HTTPHandler() self.opener = urllib2.build_opener(self.http_handler, self.redirect_handler) if proxy: self.proxy_handler = urllib2.ProxyHandler(proxy) self.opener.add_handler(self.proxy_handler) self.opener.addheaders = [('User-agent', user_agent), ('Referer', 'https://pkk.rosreestr.ru/')] urllib2.install_opener(self.opener)
def download_for_month(self, year, month, symbol, destination_directory): filename = self.get_downloaded_filename(month, symbol, year) filename_with_directory = destination_directory + filename if os.path.isfile(destination_directory + filename): raise OSError( errno.EEXIST, "File '" + filename_with_directory + "' already exists", filename_with_directory) opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.login_response_cookies)) opener.addheaders += [ ("Referer", self.url_provider.get_download_referrer_url(year, month)) ] try: url_to_download = self.url_provider.get_download_url1( year, month, symbol) print("Downloading '%s' to '%s'" % (url_to_download, filename_with_directory)) response = opener.open(url_to_download) f = open(filename_with_directory, "wb") f.write(response.read()) f.close() except: print("Downloading '%s' to '%s'" % (url_to_download, filename_with_directory)) try: url_to_download = self.url_provider.get_download_url2( year, month, symbol) print("Downloading '%s' to '%s'" % (url_to_download, filename_with_directory)) response = opener.open(url_to_download) f = open(filename_with_directory, "wb") f.write(response.read()) f.close() except: raise return filename_with_directory
def update(self, workdir): if not os.path.exists(workdir): os.makedirs(workdir) purl = urlparse(self.url) self._logger.info('Downloading %s', self.url) try: hdlrs = [urllib2.HTTPRedirectHandler()] if purl.scheme == 'https': hdlrs.append(urllib2.HTTPSHandler()) opener = urllib2.build_opener(*hdlrs) resp = opener.open(self.url) except urllib2.URLError, e: raise SourceError('Downloading %s failed. %s' % (self.url, e))
def get_stock_html(ticker_name): opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), ) opener.addheaders = [('User-agent', "Mozilla/4.0 (compatible; MSIE 7.0; " "Windows NT 5.1; .NET CLR 2.0.50727; " ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)")] url = "http://finance.yahoo.com/q?s=" + ticker_name # url = "http://127.0.0.1:82/index.php?page=robots-txt.php" response = opener.open(url) html = response.read() soup = BeautifulSoup(html) quote = soup.find('div', attrs={'class': 'yfi_quote_summary'}) print quote
def __init__(self, *args, **kargs): urllib2.OpenerDirector.__init__(self, *args, **kargs) #agregando soporte basico self.add_handler(urllib2.ProxyHandler()) self.add_handler(urllib2.UnknownHandler()) self.add_handler(urllib2.HTTPHandler()) self.add_handler(urllib2.HTTPDefaultErrorHandler()) self.add_handler(urllib2.HTTPRedirectHandler()) self.add_handler(urllib2.FTPHandler()) self.add_handler(urllib2.FileHandler()) self.add_handler(urllib2.HTTPErrorProcessor()) #Agregar soporte para cookies. (en este momento no es necesario, #pero uno nunca sabe si se puede llegar a nececitar) self.cj = cookielib.CookieJar() self.add_handler(urllib2.HTTPCookieProcessor(self.cj))
def get_opener(): if os.access(COOKIE, os.F_OK): cj.load(ignore_discard=True) opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(cj) ) opener.addheaders = [ ('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] return opener
def Login(uname, passwd): url = 'https://wse1.webcorp.org.uk/login/seredirect.php' values = {'username': uname, 'password': passwd} data = urllib.urlencode(values) cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(cookie)) opener.addheaders = [('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] print '\nLogging in as %s' % uname opener.open(url, data) print '\nDone' return cookie
def __init__(self, username, password): # url for website we want to log in to self.base_url = 'http://baseurl.com' # login action we want to post data to # could be /login or /account/login or something similar self.login_action = '/account/login.php' # file for storing cookies self.cookie_file = 'login.cookies' # user provided username and password self.username = username self.password = password # set up a cookie jar to store cookies self.cj = cookielib.MozillaCookieJar(self.cookie_file) # set up opener to handle cookies, redirects etc self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj) ) # pretend we're a web browser and not a python script self.opener.addheaders = [('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] # open the front page of the website to set and save initial cookies response = self.opener.open(self.base_url) self.cj.save() # try and log in to the site response = self.login() #print response.read() data = urllib.urlencode({ 'fieldName1' : 'fieldValue1', 'fieldName2' : 'fieldValue2', 'btnSubmit' : "submit" }) response = self.opener.open("http://baseurl.com/func.php", data) print response.read()
def getSateConnection(loginUrl, valueDict, headers=None): '''Returns the response and statedOpener for login Url with data''' #the opener maintains cookies cookies = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(cookies)) if isinstance(headers, list): opener.addheaders = headers data = urllib.urlencode(valueDict) response = opener.open(loginUrl, data) return response, opener
def __init__(self): """Init login URL, credentials, cookies, and URL opener.""" # Login URL and settings self.loginurl = settings.LOGIN_URL self.credentials = { settings.USERNAME_LABEL: settings.USERNAME, settings.PASSWORD_LABEL: settings.PASSWORD } # Make the cookie jar self.cookies = cookielib.CookieJar() # Build URL opener self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self.cookies))
def __init__(self, token, episode_id, filename=''): self.token = token self.episode_id = episode_id self.filename = filename if len(self.filename) > 0: self.action = 'episode?access_token=%s&filename=%s' % ( self.token, self.filename) else: if self.episode_id.startswith('tt'): self.action = 'episode?access_token=%s&imdb_id=%s' % ( self.token, self.episode_id) else: self.action = 'episode?access_token=%s&episode_id=%s' % ( self.token, self.episode_id) self.cj = cookielib.CookieJar() self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj)) self.opener.addheaders = [('User-agent', 'Lynx/2.8.1pre.9 libwww-FM/2.14')] self.opener.get_method = lambda: 'GET' request_url = "%s%s" % (request_uri, self.action) log('FindEpisode request_url=%s' % request_url) try: response = self.opener.open(request_url, None) data = json.loads(''.join(response.readlines())) log('FindEpisode response=%s' % data) except: data = None if (data is None) or (data['result'] == "KO"): self.is_found = False else: self.is_found = True self.resultdata = data['result'] self.showname = data['episode']['show']['name'] self.episodename = data['episode']['name'] self.season_number = data['episode']['season_number'] self.number = data['episode']['number'] self.id = data['episode']['id']
def __init__(self, base_url): self.bu = base_url self.cookie_file = 'login.cookies' self.cj = cookielib.MozillaCookieJar(self.cookie_file) self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj)) # pretend we're a web browser and not a python script self.opener.addheaders = [('User-agent', ( 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4' )), ('Connection', ('keep alive')), ('Content-Type', ('application/x-www-form-urlencoded'))] # open the front page of the website to set and save initial cookies self.response = self.opener.open(self.bu) self.cj.save()
def login(self): url = "%ssrv/en/xml.user.login" % self.base headers = { "Content-Type": "application/x-www-form-urlencoded", "Accept": "text/plain" } post = urllib.urlencode({ "username": self.user, "password": self.password }) request = urllib2.Request(url, post, headers) self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(), urllib2.HTTPRedirectHandler()) response = self.opener.open(request) doc = etree.fromstring(response.read()) assert doc.tag == 'ok', "GeoNetwork login failed!" self.connected = True
def __init__(self, login, password): """ Start up... """ self.login = login self.password = password self.cj = cookielib.CookieJar() self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj)) self.opener.addheaders = [('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)'))] # need this twice - once to set cookies, once to log in... self.loginToFacebook() self.loginToFacebook()
def handleRedirect(baseurl, parsedkeyword, viewItems): url = baseurl + "/search=" + parsedkeyword + "?" + viewItems #assemble url req = urllib2.Request(url) thing = urllib2.HTTPRedirectHandler() thing2 = urllib2.HTTPCookieProcessor() opener = urllib2.build_opener(thing, thing2) try: page = opener.open(req).read() rawurl = re.findall(r"var url = \"(.+?)\";", page) if rawurl == []: # that's a noresult return url return baseurl + rawurl[0].replace("\\", "") except: r = requests.get(url, allow_redirects=True) return r.url
def get_stock_html(ticker_name): # create opener object opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), ) # add headers to request; pretending to be IE7 on Windows XP opener.addheaders = [('User-agent', "Mozilla/4.0 (compatible; MSIE 7.0; " "Windows NT 5.1; .NET CLR 2.0.50727; " ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)")] # read page with opener url = "http://finance.yahoo.com/q?s=" + ticker_name response = opener.open(url) return ''.join(response.readlines())
def auth(email, password): # Authorization form def auth_user(email, password, opener): response = opener.open("https://m.vk.com") doc = response.read() parser = FormParser() parser.feed(doc) parser.close() if not parser.form_parsed or parser.url is None or "pass" not in parser.params or \ "email" not in parser.params: raise RuntimeError("Something wrong") parser.params["email"] = email parser.params["pass"] = password if parser.method == "POST": response = opener.open(parser.url, urllib.urlencode(parser.params)) else: raise NotImplementedError("Method '%s'" % parser.method) return response.read(), response.geturl() #2nd step of authentification def sms_code(doc, opener): parser = FormParser() parser.feed(doc) parser.close() auth_code = xbmcgui.Dialog().numeric(0,'Auth code:') auth_code = int(auth_code) parser.params["code"] = auth_code if parser.method == "POST": response = opener.open("https://m.vk.com" + parser.url, urllib.urlencode(parser.params)) else: raise NotImplementedError("Method '%s'" % parser.method) return response.read(), response.geturl() cj = cookielib.LWPCookieJar() opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cj), urllib2.HTTPRedirectHandler()) doc, url = auth_user(email, password, opener) #Check login success if "Login failed" in doc: return False #Check 2-Step Auth if "/login" in urlparse(url).path: doc, url = sms_code(doc, opener) else: cj.save(VKCookie) return True
def createOpener(self, headers=None, handler=None, proxyHandler=None): """ Create opener for fetching data. headers = [] Ex. User-agent etc like, [('User-Agent', HEADERS), ....] handler = object Ex. Handler like cookie_jar, auth handler etc. return opener """ opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0)) if headers is not None: opener.addheaders = headers if handler is not None: opener.add_handler(handler) if proxyHandler is not None: opener.add_handler(proxyHandler) return opener
def __init__(self, create_key=None, result_key=None): if not create_key: with file('%s/.atlas/auth' % os.path.expanduser('~')) \ as f: keys_l = f.read().strip().split() create_key = keys_l[0] if not result_key and len(keys_l) > 1: result_key = keys_l[1] self.create_key = create_key if not result_key: result_key = create_key self.result_key = result_key redirect_handler = urllib2.HTTPRedirectHandler() cookie_handler = urllib2.HTTPCookieProcessor() self.opener = urllib2.build_opener(redirect_handler, cookie_handler)
def __init__(self, user_id='0', tv_auth_token='0'): ''' Creates the TV API interface. The user identified by the supplied user id must have a TV profile created by a call to QuizduellApi.create_tv_user() and a personal TV auth token. @param user_id: Quizduell user id @type user_id: str @param tv_auth_token: TV auth token returned by QuizduellApi.create_tv_user() @type tv_auth_token: str ''' self._user_id = user_id self._tv_auth_token = tv_auth_token self._opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0))
def __init__(self, config=None): if config is None: self.__config = Configuration() else: self.__config = config self.__cookieJar = cookielib.MozillaCookieJar( self.__config.get('script', 'cookie_file')) if os.path.exists(self.__config.get('script', 'cookie_file')): self.__cookieJar.load() self.__opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.__cookieJar)) self.__opener.addheaders = [("User-Agent", self.__config.get('script', 'user_agent'))]
def populateWord(login_cookie, word, corp, span=20): 'Function to populate the search query' url = 'http://wse1.webcorp.org.uk/cgi-bin/' + corp + '/types.cgi?from_index=from_index&lang=english&qf=&q=' + word + '&dom=any&sent=0&f=&f_type=0&concord=Get+Concordances&min_freq=&show_num=' opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(login_cookie)) opener.addheaders = [('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] print '\n[%s][%s][span%s][search population]Start' % (word, corp, span) opener.open(url, timeout=10) if corp == 'BLOG': url = 'http://wse1.webcorp.org.uk/cgi-bin/' + corp + '/search.cgi?q=' + word + ',english-any,0,0,0-0-0' else: url = 'http://wse1.webcorp.org.uk/cgi-bin/' + corp + '/search.cgi?q=' + word + ',any,0,0,0-0-0' opener.open(url, timeout=10) print '\n[%s][%s][span%s][search population]Done' % (word, corp, span)
def __init__(self, source=None, username='', password=''): '''Source must be `espn` or `premierleague`.''' self.username = username self.password = password self.source = source self._cache = dict() self.cookiejar = cookielib.CookieJar() self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cookiejar), urllib2.ProxyHandler() # Auto-detect proxies ) self.opener.addheaders = [('User-agent', 'Mozilla/5.0')]
def fetchNetData(self, weekOrYear = True): """ Fetch the sensor readings from the internet. Keyword arguments: username --- The webpage username. username --- The webpage password. weekOrYear --- True if gathering Yearly data, False for weekly data.""" url = 'http://meteoroleg.upc.es/dexserver/j_spring_security_check' if weekOrYear: #Yearly urlTemp = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=83' urlHum = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=84' urlLum = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=85' else: #weekly urlTemp = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=87' urlHum = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=89' urlLum = 'http://meteoroleg.upc.es/dexserver/report-results.htm?6578706f7274=1&d-49653-e=1&queryId=91' login = { 'j_username': self.username , 'j_password': self.password } headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US)'} loginFormData = urllib.urlencode(login) req = urllib2.Request(url, loginFormData, headers) resp = urllib2.urlopen(req) cookies = CookieJar() cookies.extract_cookies(resp, req) cookie_handler = urllib2.HTTPCookieProcessor(cookies) redirect_handler = urllib2.HTTPRedirectHandler() opener = urllib2.build_opener(redirect_handler, cookie_handler) #Making the initial connection for the login opener.open(req) reqTemp = urllib2.Request(urlTemp, headers = headers) reqHum = urllib2.Request(urlHum, headers = headers) reqLum = urllib2.Request(urlLum, headers = headers) log(self.logger, logging.INFO, 'Fetching temperature data from %s', urlTemp) respTemp = opener.open(reqTemp) log(self.logger, logging.INFO, 'Fetching humidity data from %s', urlHum) respHum = opener.open(reqHum) log(self.logger, logging.INFO, 'Fetching luminosity data from %s', urlLum) respLum = opener.open(reqLum) self.l.extend(respTemp) self.l.extend(respHum) self.l.extend(respLum)
def SendRequest(url, method=None, data=None): """Sends a HTTP request to the WebDriver server. Return values and exceptions raised are the same as those of |urllib2.urlopen|. Arguments: url: The full URL to send the request to. method: The HTTP request method to use; defaults to 'GET'. data: The data to send with the request as a string. Defaults to None and is ignored if |method| is not 'POST' or 'PUT'. Returns: A file-like object. """ request = Request(url, method=method, data=data) request.add_header('Accept', 'application/json') opener = urllib2.build_opener(urllib2.HTTPRedirectHandler()) return opener.open(request)
def __init__(self): self.__addon__ = xbmcaddon.Addon(id="service.subtitles.torec") self.__addonname__ = self.__addon__.getAddonInfo('id') dataroot = xbmc.translatePath('special://profile/addon_data/%s' % self.__addonname__).decode('utf-8') cookie = os.path.join(dataroot, "torec.cookie") self.cj = cookielib.LWPCookieJar(cookie) if os.path.exists(cookie): self.cj.load(ignore_discard=True) self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(), urllib2.HTTPCookieProcessor(self.cj)) self.opener.addheaders = [( 'User-agent', ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' ), )]
def __init__(self): global DATA self.output_file = open("output.csv",'w') self.log_file = open("log.csv", 'w') self.cookie_filename = "parser.cookies.txt" #simulate browser beahviour with session cookies self.cj = cookielib.MozillaCookieJar(self.cookie_filename) if os.access(self.cookie_filename, os.F_OK): self.cj.load() self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj) ) self.opener.addheaders = [ ('User-agent', ( "Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0" )) ]