def getDLurl(self, url): try: content = self.getUrl(url) match = re.findall('flashvars.playlist = \'(.*?)\';', content) if match: for url in match: url = 'http://ua.canna.to/canna/'+url content = self.getUrl(url) match = re.findall('<location>(.*?)</location>', content) if match: for url in match: req = mechanize.Request('http://ua.canna.to/canna/single.php') response = mechanize.urlopen(req) url = 'http://ua.canna.to/canna/'+url req = mechanize.Request(url) req.add_header('User-Agent', canna_agent) response = mechanize.urlopen(req) response.close() code=response.info().getheader('Content-Location') url='http://ua.canna.to/canna/avzt/'+code return url except urllib2.HTTPError, error: printl(error,self,"E") message = self.session.open(MessageBoxExt, (_("Error: %s") % error), MessageBoxExt.TYPE_INFO, timeout=3) return False
def test_gzip(self): p = HTTPGzipProcessor() url = "https://www.example.com/" req = p.https_request(mechanize.Request(url)) self.assertIsNone(req.get_header('Accept-Encoding')) p.request_gzip = True req = p.https_request(mechanize.Request(url)) self.assertEqual(req.get_header('Accept-Encoding'), 'gzip') req = mechanize.Request(url) req.add_header('Accept-Encoding', 'moo, *') req = p.https_request(req) self.assertEqual(req.get_header('Accept-Encoding'), 'moo, *, gzip') data = os.urandom(1024 * 1024) cdata = b''.join(compress_readable_output(BytesIO(data))) r = MockResponse( url, data=cdata, info={ 'Content-Encoding': 'gzip', 'Content-Length': str(len(cdata)) }) r = p.https_response(req, r) self.assertEqual(r.read(), data) h = r.info() self.assertFalse(h.getheaders('content-encoding')) self.assertFalse(h.getheaders('content-length'))
def getDLurl(self, url): try: content = self.getUrl(url) match = re.findall('flashvars.playlist = \'(.*?)\';', content) if match: for url in match: url = 'http://ua.canna.to/canna/' + url content = self.getUrl(url) match = re.findall('<location>(.*?)</location>', content) if match: for url in match: url = 'http://ua.canna.to/canna/' + url req = mechanize.Request( 'http://ua.canna.to/canna/single.php') response = mechanize.urlopen(req) req = mechanize.Request(url) req.add_header( 'User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' ) response = mechanize.urlopen(req) response.close() code = response.info().getheader( 'Content-Location') url = 'http://ua.canna.to/canna/avzt/' + code print url return url except urllib2.HTTPError, error: printl(error, self, "E") message = self.session.open(MessageBox, ("Fehler: %s" % error), MessageBox.TYPE_INFO, timeout=3) return False
def ieee_get_csv(keyword): import urllib import mechanize # Keyword processing for ieee keyword = keyword.replace('"', '.QT.') # Crawl intelligent browser br = crawler_browser() # Fake request to mimic normal user # TODO: Play with their analytics requests too for even more real request. URL = 'http://ieeexplore.ieee.org/search/searchresult.jsp?queryText=' + urllib.quote_plus( keyword) + '&newsearch=true' fake = br.open(URL) # Search request as browser br.set_header('Referer', URL) data = '{"queryText":"' + keyword + '","newsearch":"true"}' search = br.open( mechanize.Request('http://ieeexplore.ieee.org/rest/search', data=data, headers={"Content-type": "application/json"})) # Export as csv request params = {'bulkSetSize': 2000} data = urllib.urlencode(params) csv_request = br.open( mechanize.Request('http://ieeexplore.ieee.org/search/searchExport.jsp', data=data)) csv_data = csv_request.read() return csv_data
def __callRequest(self): cookieJar = mechanize.LWPCookieJar() try: #TODO ohne try evtl. cookieJar.load(self._cookiePath, self.__bIgnoreDiscard, self.__bIgnoreExpired) except Exception as e: logger.info(e) sParameters = urllib.urlencode(self.__aParameters) opener = mechanize.build_opener(SmartRedirectHandler, mechanize.HTTPEquivProcessor, mechanize.HTTPRefreshProcessor) if (len(sParameters) > 0): oRequest = mechanize.Request(self.__sUrl, sParameters) else: oRequest = mechanize.Request(self.__sUrl) for aHeader in self.__aHeaderEntries: for sHeaderKey, sHeaderValue in aHeader.items(): oRequest.add_header(sHeaderKey, sHeaderValue) cookieJar.add_cookie_header(oRequest) if self.caching and self.cacheTime > 0: sContent = self.readCache(self.getRequestUri()) if sContent: return sContent try: oResponse = opener.open(oRequest,timeout = 60) except mechanize.HTTPError, e: if not self.ignoreErrors: xbmcgui.Dialog().ok('xStream','Fehler beim Abrufen der Url:',self.__sUrl, str(e)) logger.error("HTTPError "+str(e)+" Url: "+self.__sUrl) return '' else: oResponse = e
def rtnHTMLformat(tmpddGenrcgenPresent, sppPrefx, pthwcod, ouPthwpng): inpx = '\n'.join(tmpddGenrcgenPresent) # inpx="ALDH2 color \nALDH3A1 color" request = mechanize.Request( "http://www.genome.jp/kegg/tool/map_pathway2.html") response = mechanize.urlopen(request) forms = mechanize.ParseResponse(response, backwards_compat=False) form = forms[0] form["unclassified"] = inpx form["org"] = sppPrefx request2 = form.click() response2 = mechanize.urlopen(request2) a = str(response2.read()).split('href="/kegg-bin/show_pathway?')[1] code = a.split('/')[0] # response2.read() request = mechanize.Request( "http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args" % (code, pthwcod)) # request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410')) response = mechanize.urlopen(request) forms = mechanize.ParseResponse(response, backwards_compat=False) form = forms[1] status = ' NOT ' try: imgf = str(forms[1]).split('/mark_pathway')[1].split('/')[0] os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s" % (imgf, pthwcod, ouPthwpng)) status = ' ' except: pass return 'A pathway image was%ssuccefully produced...' % status
def login(self): headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'Referer': 'https://www.coned.com/', 'Content-Type': 'application/json', 'Origin': 'https://www.coned.com' } data = json.dumps({ "LoginEmail": self.user, "LoginPassword": self.password, "LoginRememberMe": False, "ReturnUrl": "", "OpenIdRelayState": "" }) request = mechanize.Request( "https://www.coned.com/sitecore/api/ssc/ConEd-Cms-Services-Controllers-Okta/User/0/Login", data, headers) self.browser.open(request) totp = pyotp.TOTP(self.totp) thing = json.dumps({ "MFACode": totp.now(), "ReturnUrl": "", "OpenIdRelayState": "" }) request = mechanize.Request( 'https://www.coned.com/sitecore/api/ssc/ConEd-Cms-Services-Controllers-Okta/User/0/VerifyFactor', thing, headers) response = self.browser.open(request) redirect_url = json.loads(response.read())["authRedirectUrl"] response = self.browser.open(redirect_url)
def __get_csv(self, letter='a', now=False): #open the url current_url = self.overview_url + '1111&b=' + letter overview_req = mechanize.Request(current_url) overview_res = mechanize.urlopen(overview_req) #find the list of entries to post py_query = PyQuery(overview_res.read()) titlelist = py_query("input[name='titelnrliste']").val() #create the post request post_data = { 'url': current_url, 'download': '[Download]', 'titelnrliste': titlelist } if (now): #find the checked box (the current quartal) default_quartal = py_query(".quartal input:checked").attr('name') post_data[str(default_quartal)] = 'ON' else: #enable all quartal's checkbox quartals = [1, 2, 3, 4] for i in quartals: if i in range(1, 5): post_data[str(self.year) + str(i)] = 'ON' #send the post request csv_req = mechanize.Request(current_url, urllib.urlencode(post_data)) csv_res = mechanize.urlopen(csv_req) self.csv_parser.process_result(response=csv_res)
def __callRequest(self): sParameters = urllib.urlencode(self.__aParamaters) if (self.__cType == cRequestHandler.REQUEST_TYPE_GET): if (len(sParameters) > 0): if (self.__sUrl.find('?') == -1): self.__sUrl = self.__sUrl + '?' + str(sParameters) sParameters = '' else: self.__sUrl = self.__sUrl + '&' + str(sParameters) sParameters = '' if (len(sParameters) > 0): oRequest = mechanize.Request(self.__sUrl, sParameters) else: oRequest = mechanize.Request(self.__sUrl) for aHeader in self.__aHeaderEntries: for sHeaderKey, sHeaderValue in aHeader.items(): oRequest.add_header(sHeaderKey, sHeaderValue) try: oResponse = mechanize.urlopen(oRequest) except urllib2.HTTPError, e: xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:', self.__sUrl, str(e)) logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl) return ''
def fanboxGetPostsFromArtist(self, artist_id, next_url=""): ''' get all posts from the supported user from https://www.pixiv.net/ajax/fanbox/creator?userId=15521131 ''' if next_url is None or next_url == "": url = "https://www.pixiv.net/ajax/fanbox/creator?userId={0}".format( artist_id) elif next_url.startswith("https://"): url = next_url else: url = "https://www.pixiv.net" + next_url # Fix #494 PixivHelper.print_and_log('info', 'Getting posts from ' + url) referer = "https://www.pixiv.net/fanbox/creator/{0}".format(artist_id) req = mechanize.Request(url) req.add_header('Accept', 'application/json, text/plain, */*') req.add_header('Referer', referer) req.add_header('Origin', 'https://www.pixiv.net') req.add_header('User-Agent', self._config.useragent) res = self.open_with_retry(req) response = res.read() PixivHelper.get_logger().debug(response.decode('utf8')) res.close() # Issue #420 _tzInfo = None if self._config.useLocalTimezone: _tzInfo = PixivHelper.LocalUTCOffsetTimezone() result = FanboxArtist(artist_id, response, tzInfo=_tzInfo) pixivArtist = PixivArtist(artist_id) self.getMemberInfoWhitecube(artist_id, pixivArtist) result.artistName = pixivArtist.artistName result.artistToken = pixivArtist.artistToken for post in result.posts: # https://fanbox.pixiv.net/api/post.info?postId=279561 # https://www.pixiv.net/fanbox/creator/104409/post/279561 p_url = "https://fanbox.pixiv.net/api/post.info?postId={0}".format( post.imageId) p_referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format( artist_id, post.imageId) PixivHelper.get_logger().debug('Getting post detail from %s', p_url) p_req = mechanize.Request(p_url) p_req.add_header('Accept', 'application/json, text/plain, */*') p_req.add_header('Referer', p_referer) p_req.add_header('Origin', 'https://www.pixiv.net') p_req.add_header('User-Agent', self._config.useragent) p_res = self.open_with_retry(p_req) p_response = p_res.read() PixivHelper.get_logger().debug(p_response.decode('utf8')) p_res.close() js = demjson.decode(p_response) post.parsePost(js["body"]) return result
def resolve(self, url, cookie_jar, user_agent): headers = {'User-agent': user_agent, 'Referer': url} try: cookie_jar.load(ignore_discard=True) except Exception as e: logger.info(e) opener = mechanize.build_opener( mechanize.HTTPCookieProcessor(cookie_jar)) request = mechanize.Request(url) for key in headers: request.add_header(key, headers[key]) try: response = opener.open(request) except mechanize.HTTPError as e: response = e body = response.read() cookie_jar.extract_cookies(response, request) cookie_helper.check_cookies(cookie_jar) parsed_url = urlparse(url) submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, parsed_url.netloc) params = {} try: params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1) params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1) js = self._extract_js(body) except mechanize.HTTPError as e: return None params["jschl_answer"] = str(js + len(parsed_url.netloc)) sParameters = urllib.urlencode(params, True) request = mechanize.Request("%s?%s" % (submit_url, sParameters)) for key in headers: request.add_header(key, headers[key]) sleep(5) try: response = opener.open(request) except mechanize.HTTPError as e: response = e return response
def __callRequest(self): if self.caching and self.cacheTime > 0: sContent = self.readCache(self.getRequestUri()) if sContent: return sContent cookieJar = mechanize.LWPCookieJar(filename=self._cookiePath) try: # TODO ohne try evtl. cookieJar.load(ignore_discard=self.__bIgnoreDiscard, ignore_expires=self.__bIgnoreExpired) except Exception as e: logger.info(e) sParameters = urllib.urlencode(self.__aParameters, True) handlers = [ SmartRedirectHandler, mechanize.HTTPEquivProcessor, mechanize.HTTPRefreshProcessor ] if sys.version_info >= (2, 7, 9) and sys.version_info < (2, 7, 11): handlers.append(newHTTPSHandler) opener = mechanize.build_opener(*handlers) if (len(sParameters) > 0): oRequest = mechanize.Request(self.__sUrl, sParameters) else: oRequest = mechanize.Request(self.__sUrl) for key, value in self.__headerEntries.items(): oRequest.add_header(key, value) cookieJar.add_cookie_header(oRequest) user_agent = self.__headerEntries.get( 'User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' ) try: oResponse = opener.open(oRequest, timeout=self.requestTimeout) except mechanize.HTTPError, e: if e.code == 503 and e.headers.get("Server") == 'cloudflare-nginx': html = e.read() oResponse = self.__check_protection(html, user_agent, cookieJar) if not oResponse: logger.error("Failed to get CF-Cookie for Url: " + self.__sUrl) return '' elif not self.ignoreErrors: xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:', self.__sUrl, str(e)) logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl) return '' else: oResponse = e
def test_set_handled_schemes(self): class MockHandlerClass(make_mock_handler()): def __call__(self): return self class BlahHandlerClass(MockHandlerClass): pass class BlahProcessorClass(MockHandlerClass): pass BlahHandler = BlahHandlerClass([("blah_open", None)]) BlahProcessor = BlahProcessorClass([("blah_request", None)]) class TestUserAgent(mechanize.UserAgent): default_schemes = ["http"] default_others = [] default_features = [] handler_classes = mechanize.UserAgent.handler_classes.copy() handler_classes.update({ "blah": BlahHandler, "_blah": BlahProcessor }) ua = TestUserAgent() self.assertEqual(list(h.__class__.__name__ for h in ua.handlers), ["HTTPHandler"]) ua.set_handled_schemes(["http", "file"]) self.assertEqual(sorted(h.__class__.__name__ for h in ua.handlers), ["FileHandler", "HTTPHandler"]) self.assertRaises(ValueError, ua.set_handled_schemes, ["blah", "non-existent"]) self.assertRaises(ValueError, ua.set_handled_schemes, ["blah", "_blah"]) ua.set_handled_schemes(["blah"]) req = mechanize.Request("blah://example.com/") ua.open(req) exp_calls = [("blah_open", (req, ), {})] assert len(ua.calls) == len(exp_calls) for got, expect in zip(ua.calls, exp_calls): self.assertEqual(expect, got[1:]) ua.calls = [] req = mechanize.Request("blah://example.com/") ua._set_handler("_blah", True) ua.open(req) exp_calls = [("blah_request", (req, ), {}), ("blah_open", (req, ), {})] assert len(ua.calls) == len(exp_calls) for got, expect in zip(ua.calls, exp_calls): self.assertEqual(expect, got[1:]) ua._set_handler("_blah", True)
def login(self, username, password): try: PixivHelper.print_and_log('info', 'Logging in...') # url = "https://accounts.pixiv.net/login" # get the post key # res = self.open_with_retry(url) # parsed = BeautifulSoup(res, features="html5lib") # js_init_config = self._getInitConfig(parsed) data = {} data['pixiv_id'] = username data['password'] = password data['captcha'] = '' data['g_recaptcha_response'] = '' data['return_to'] = 'https://www.pixiv.net' data['lang'] = 'en' # data['post_key'] = js_init_config["pixivAccount.postKey"] data['source'] = "accounts" data['ref'] = '' request = mechanize.Request("https://accounts.pixiv.net/api/login?lang=en", urllib.parse.urlencode(data)) response = self.open_with_retry(request) return self.processLoginResult(response, username, password) except BaseException: traceback.print_exc() PixivHelper.print_and_log('error', 'Error at login(): {0}'.format(sys.exc_info())) raise
def get_vorlage(session_id, url): try: response = mechanize.urlopen(mechanize.Request(url)) pprint.pprint(response) except URLError: return forms = mechanize.ParseResponse(response, backwards_compat=False) for form in forms: # All forms are iterated. Might not all be attachment-related. for control in form.controls: if control.name == 'DT': print control.name, control.value request2 = form.click() try: response2 = mechanize.urlopen(request2) form_url = response2.geturl() if "getfile.asp" in form_url: #print "ERFOLG:", response2.info() pdf = response2.read() md5 = hashlib.md5(pdf).hexdigest() scraperwiki.sqlite.save( unique_keys=['session_id', 'dt', 'md5', 'size'], data={ 'session_id': session_id, 'dt': control.value, 'md5': md5, 'size': len(pdf) }) continue except mechanize.HTTPError, response2: print "HTTP-FEHLER :(" except URLError: pass
def RA_do_search(request): campings = pd.DataFrame() searchString1 = 'currentPage=' searchString2 = '&paging=true&facilityType=all&agencyKey=&facilityAvailable=show_all&viewType=view_list&selectedLetter=ALL&owner=&hiddenFilters=false' r = prep_header_req(request) soup = BeautifulSoup(r.read(), "html.parser") # print soup pages_str = soup.find_all( "div", {"class": "usearch_results_label"})[0].contents[0].encode('ascii') m = re.match(r"Search Results: (\d+)-(\d+) of (\d+)", pages_str) pages = int(m.group(3)) / (int(m.group(2)) - int(m.group(1)) + 1) campings = collect_data(soup, campings) for page in range(1, pages): searchResultURL = r.geturl() + '?' + searchString1 + str( page) + searchString2 req2 = mechanize.Request(searchResultURL) r2 = prep_header_req(req2) soup = BeautifulSoup(r2.read(), "html.parser") print page + 1, # f = open('/Users/hillenr/tmp/sample_mech.html', 'w') # f.write(r2.read()) # f.close() campings = collect_data(soup, campings) print return campings
def api_call(self, apiUrl, apiMethod='GET', apiBody=''): br = self._get_browser() if not self.logged_in: self._login() cookiejar = br.cookiejar ajaxkey = None for cookie in cookiejar: if 'afg' == cookie.name: ajaxkey = cookie.value log.debug("ajaxkey is %s", ajaxkey) apiCall = mechanize.Request('https://www.alarm.com/web/api/' + apiUrl, data=apiBody, method=apiMethod) apiCall.add_header('ajaxrequestuniquekey', ajaxkey) apiCall.add_header('Accept', 'application/vnd.api+json') apiCall.add_header('Content-Type', 'application/json; charset=UTF-8') result = None try: response = br.open(apiCall) content = response.read() log.debug("Post command JSON is %s", content) result = json.loads(content) log.debug(result) except: e = sys.exc_info()[0] log.debug("got an error %s", e) return result
def set_video_metadata(self, video): # The player html code with all the required information is loaded # after the main page using javascript and a special XmlHttpRequest # we emulate this behaviour from_request = self.group_dict['from'] query = urllib.urlencode({ 'from_request': from_request, 'request': '/video/%s?get_video=1' % video.id }) request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query) # This header is mandatory to have the correct answer from dailymotion request.add_header('X-Requested-With', 'XMLHttpRequest') player_html = self.browser.readurl(request) try: m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html) flashvars = urlparse.parse_qs(m.group('flashvars')) info = json.loads(flashvars['sequence'][0]) # The video parameters seem to be always located at the same place # in the structure: ['sequence'][0]['layerList'][0]['sequenceList'] # [0]['layerList'][0]['param']['extraParams']) # # but to be more tolerant to future changes in the structure, we # prefer to look for the parameters everywhere in the structure def find_video_params(data): if isinstance(data, dict): if 'param' in data and 'extraParams' in data['param']: return data['param']['extraParams'] data = data.values() if not isinstance(data, list): return None for item in data: ret = find_video_params(item) if ret: return ret return None params = find_video_params(info['sequence']) video.title = unicode(params['videoTitle']) video.author = unicode(params['videoOwnerLogin']) video.description = unicode(params['videoDescription']) video.thumbnail = BaseImage(params['videoPreviewURL']) video.thumbnail.url = unicode(params['videoPreviewURL']) video.duration = datetime.timedelta( seconds=params['mediaDuration']) except: # If anything goes wrong, we prefer to return normally, this will # allow video download to work even if we don't have the metadata pass
def support(x_cord, y_cord, browser): branfrage = mechanize.Request( "http://de101.die-staemme.de/game.php?village=5512&screen=place") response = browser.open(branfrage) forms = ParseResponse(response) form = forms[0] #print form control = form.find_control(name="support", type="submit") #print control.name, control.value, control.type form["sword"] = "180" form["x"] = str(x_cord) form["y"] = str(y_cord) oeffnen = form.click(control.name) antwort = browser.open(oeffnen) forms2 = ParseResponse(antwort) form2 = forms2[0] #print form2 control2 = form2.find_control(type="submit") #print control2.name, control2.value, control2.type oeffnen = form2.click(control2.type) browser.open(oeffnen)
def load_more_elements_to_process(browser, used_asins, offset): """ Emulates the addNextBook Javascript function called when the user approaches the bottom of the Kindle highlights page This function generates HTML on the backend (why is it being built on the backend???), then sends it to the frontend which will drop it into the DOM We hit the same endpoint to get the new piece of HTML that should be inserted, then pull out the new highlight tags with Beautiful Soup This is necessary because not all books are shown on pageload Return - triple of (new BeautifulSoup tags loaded, ASIN of new book, new offset to use) """ params = { "current_offset": offset, "used_asins[]": used_asins, "upcoming_asins[]": "" # Unused, as far as I can tell } encoded_params = urllib.urlencode(params, True) # Amazon uses the doseq style request = mechanize.Request(KINDLE_HIGHLIGHTS_URL + "/next_book?" + encoded_params) request.add_header("Referer", KINDLE_HIGHLIGHTS_URL) response = browser.open(request) response_data = response.get_data() if len(response_data.strip()) == 0: return ([], used_asins, offset) # No more books soup = BeautifulSoup(response.read()) """ def filter_func(tag): tag_classes = tag["class"] return tag.name == "div" and (BOOK_DIV_CLASS in tag_classes or HIGHLIGHT_DIV_CLASS in tag_classes) """ new_elements = soup.select( "> div") # Get top-level divs which will be the nodes we want new_book_tag = soup.select("div." + BOOK_DIV_CLASS)[0] new_book_asin, new_offset = new_book_tag["id"].split("_") return (new_elements, new_book_asin, new_offset)
def _scrapeUrl(self, url): """scrape a generic url """ #grab the data -- go internets! request3 = mechanize.Request(url) self.cj.add_cookie_header(request3) response3 = mechanize.urlopen(request3) maincontent = response3.read() #make the soup soup = BeautifulSoup(maincontent) #parse the soup #This thing is a beast # date/times and games are intersperced # The first thing should be a date # then all games following are on that date # So - we find all dates and games with our query and handle them # as they happen in order date=None tags = soup.findAll(**{'class':["schedules-list-date", 'schedules-list-hd pre', 'schedules-list-hd post']}) print "found %s tags" %len(tags) for tag in tags: #we got a date! if tag['class']=='schedules-list-date': #we've found a new date gameDateStr = str(tag.find('span').text) monthStr, date = gameDateStr.split(',')[1].strip().split() monthNum = self.MONTH_MAP[str(monthStr)] if monthNum in (1,2): year = self.year+1 else: year = self.year dateInt = int(''.join([x for x in date if x.isdigit()])) date = datetime.date(year, monthNum, dateInt) else: #we've got a new game -parse out home and away team home = str(tag.find(**{'class':['team-name home ', 'team-name home lost']}).text) away = str(tag.find(**{'class':['team-name away ', 'team-name away lost']}).text) #need to get the time as well time = str(tag.find(**{'class':'time'}).text) if time=='FINAL': print "CANNOT GET VALID TIME FOR GAMES that are in the past" hr=0 minute=0 else: hr, minute = time.split(':') amPm = str(tag.find(**{'class':['am', 'pm']}).text).strip() hr = int(hr) minute=int(minute) #adjust times to take into account am/pm if amPm=="PM" and hr <12: hr+=12 if amPm=="AM" and hr==12: hr=0 d={'week':self.week, 'home':self.TEAM_MAP[home], 'away':self.TEAM_MAP[away], 'kickoff':datetime.datetime(date.year, date.month, date.day, hr, minute, tzinfo=self.EASTERN_TIME_ZONE)} self.games.append(d)
def test_sending_headers(self): handler = self._make_request_handler([(200, [], "we don't care")]) req = mechanize.Request("http://localhost:%s/" % handler.port, headers={'Range': 'bytes=20-39'}) mechanize.urlopen(req) self.assertEqual(handler.received_headers['Range'], 'bytes=20-39')
def getUrl(self,url): req = mechanize.Request(url) req.add_header('User-Agent', canna_agent) response = mechanize.urlopen(req) link = response.read() response.close() return link
def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True, enable_cache=True): ''' get page from pixiv and return as parsed BeautifulSoup object or response object. throw PixivException as server error ''' url = self.fixUrl(url) while True: req = mechanize.Request(url) req.add_header('Referer', referer) read_page = self._get_from_cache(url) if read_page is None: try: temp = self.open_with_retry(req) read_page = temp.read() read_page = read_page.decode('utf8') if enable_cache: self._put_to_cache(url, read_page) temp.close() except urllib.error.HTTPError as ex: if ex.code in [403, 404, 503]: read_page = ex.read() raise PixivException("Failed to get page: {0} => {1}".format( url, ex), errorCode=PixivException.SERVER_ERROR) else: PixivHelper.print_and_log( 'error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info()))) raise PixivException("Failed to get page: {0}".format( url), errorCode=PixivException.SERVER_ERROR) if returnParsed: parsedPage = BeautifulSoup(read_page, features="html5lib") return parsedPage return read_page
def mechanize_cookie(config, log): """Returns a new Intel Ingress cookie via mechanize.""" import mechanize log.info("Logging into Facebook using mechanize") browser = mechanize.Browser() if log.level <= 10: browser.set_debug_http(True) browser.set_debug_responses(True) browser.set_debug_redirects(True) browser.set_handle_robots(False) cookies = mechanize.CookieJar() browser.set_cookiejar(cookies) browser.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.41 Safari/534.7' )] browser.set_handle_refresh(False) log.info("Everything set - Let's go") url = 'https://www.facebook.com/v3.2/dialog/oauth?client_id=449856365443419&redirect_uri=https%3A%2F%2Fintel.ingress.com%2F' browser.open(url) log.info("Opened Facebook Login Page") log.debug(browser.geturl()) # sometimes you have to fill in the form multiple times for whatever reason tries = 0 while "https://intel.ingress.com/" not in browser.geturl() and tries < 5: tries += 1 log.info(f"Trying to log into Intel: Try {tries}/5") browser.select_form(nr=0) try: browser.form['email'] = config.ingress_user browser.form['pass'] = config.ingress_password except: pass response = browser.submit() time.sleep(2) log.debug(browser.geturl()) if "https://intel.ingress.com/" in response.geturl() and response.getcode( ) == 200: log.info("Got through. Now getting that cookie") log.debug(browser.geturl()) # this is magic req = mechanize.Request(browser.geturl()) cookie_list = browser._ua_handlers['_cookies'].cookiejar.make_cookies( response, req) final_cookie = _write_cookie(log, {c.name: c.value for c in cookie_list}) return final_cookie else: log.error("Failed to login into Intel") log.info(browser.geturl()) return ""
def fanboxGetPostsFromArtist(self, artist_id, next_url=""): ''' get all posts from the supported user from https://www.pixiv.net/ajax/fanbox/creator?userId=15521131 ''' if next_url is None or next_url == "": url = "https://www.pixiv.net/ajax/fanbox/creator?userId={0}".format( artist_id) elif next_url.startswith("https://"): url = next_url else: url = "https://www.pixiv.net" + next_url # Fix #494 PixivHelper.print_and_log('info', 'Getting posts from ' + url) referer = "https://www.pixiv.net/fanbox/creator/{0}".format(artist_id) req = mechanize.Request(url) req.add_header('Accept', 'application/json, text/plain, */*') req.add_header('Referer', referer) req.add_header('Origin', 'https://www.pixiv.net') req.add_header('User-Agent', self._config.useragent) response = self.open_with_retry(req).read() # Issue #420 _tzInfo = None if self._config.useLocalTimezone: _tzInfo = PixivHelper.LocalUTCOffsetTimezone() result = FanboxArtist(artist_id, response, tzInfo=_tzInfo) pixivArtist = PixivArtist(artist_id) self.getMemberInfoWhitecube(artist_id, pixivArtist) result.artistName = pixivArtist.artistName result.artistToken = pixivArtist.artistToken return result
def getPropertyPins(streetName): url = r'https://taxcommissioner.dekalbcountyga.gov/TaxCommissioner/TCSearch.asp' request = mechanize.Request(url) response = mechanize.urlopen(request) forms = mechanize.ParseResponse(response, backwards_compat=False) response.close() form = forms[0] form['StreetName'] = sys.argv[1] propertyList = mechanize.urlopen(form.click()).read() tree = html.fromstring(propertyList) pins = tree.xpath('//tr/td[1]/a/@href') addresses = tree.xpath('//tr/td[1]/a/text()') pinList = [] i = 0 for pin in pins: #print pin newpin = pin.split('=') pinList.append([newpin[3], addresses[i]]) print newpin[3] + '\t' + addresses[i] i = i + 1 return pinList
def requestGetQuestao(self, id_questao): ''' Recebe dados da API apartir de uma número de id de questão.''' url_api = "http://intranet.unicesumar.edu.br/sistemas/bancoDeQuestoes/action/questaoAction.php" payload = { "action": "filtrar", "data[filtroJSON][idQuestao]": id_questao, "data[filtroJSON][temaAleatorio]": 0, "data[filtroJSON][tagAndOr]": "tagAnd", "data[filtroJSON][destinoAndOr]": "destinoAnd", "data[filtroJSON][tipoRequest]": "questaoListRequest" } try: data = parse.urlencode(payload) request_form_questao = mechanize.Request(url_api, data) response = self.br.open(request_form_questao) dados_questao = response.get_data().decode("latin1") resp = "Retorno: " + str( self.br.response().getcode()) + " -> " + str( self.br.response().geturl()) logf = open(dir_path + "log.txt", "a+") logf.write(datetime.today().strftime("%d/%m/%Y, %H:%M:%S") + " - " + str(resp) + "\n") logf.close() return dados_questao except Exception as e: now = datetime.now() logf = open(dir_path + "log.txt", "a+") logf.write( now.strftime("%d/%m/%Y, %H:%M:%S") + " - " + str(e) + "\n") logf.close()
def test_auth(self): """ Test Authentication Headers """ # Setup port = 8001 handler = ThreadingSimpleServer(('localhost', port), AuthHandler) with handler.obtain(): url = "http://localhost:%s/" % (port, ) username = '******' password = '******' b = mechanize.Browser() passman = mechanize.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, url, username, password) # other authentication handlers auth_digest = urllib_request.HTTPDigestAuthHandler(passman) auth_basic = urllib_request.HTTPBasicAuthHandler(passman) b.set_handle_robots(False) # pylint: disable=no-member b.add_handler(auth_digest) # pylint: disable=no-member b.add_handler(auth_basic) # pylint: disable=no-member req = mechanize.Request(url) # Exercise b.open(req) # Verify assert b.response().code == 200
def getUrl(self,url): req = mechanize.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = mechanize.urlopen(req) link = response.read() response.close() return link