def dict_2_cookiejar(d): cj = CookieJar() for c in d: ck = Cookie( name=c["name"], value=urllib.parse.unquote(c["value"]), domain=c["domain"], path=c["path"], secure=c["secure"], rest={"HttpOnly": c["httponly"]}, version=0, port=None, port_specified=False, domain_specified=False, domain_initial_dot=False, path_specified=True, expires=None, discard=True, comment=None, comment_url=None, rfc2109=False, ) cj.set_cookie(ck) return cj
def enviaPeticion(url, dominio, ruta, cookieDicc={"TESTID": "set"}): try: # Empaquetador de cookies. jar = CookieJar() # Genera un objeto request para posterior peticion. peticion = urllib.request.Request(url=url) # crearCookie to generate a cookie and add it to the cookie jar. for key, item in cookieDicc.items(): jar.set_cookie(crearCookie(key, item, dominio, ruta)) # print(crearCookie(key, item)) jar.add_cookie_header(peticion) # Generar peticion. edmundoDantes = urllib.request.build_opener() abreteSesamo = edmundoDantes.open(peticion) RiquezaYVenganza = verificacionAcceso(abreteSesamo) if RiquezaYVenganza: print( "Busca tu propio Arbol") else: print( "!(Busca tu propio arbol)") return RiquezaYVenganza except urllib.error.HTTPError as err: print("Pagina fuera de servicio") return "Pagina fuera de servicio"
def _getCookies(headers): cj = CookieJar() cookies = headers.split(',') for cookie in cookies: attrs = cookie.split(';') cookieNameValue = name = attrs[0].strip().split('=') pathNameValue = name = attrs[1].strip().split('=') ck = Cookie(version=1, name=cookieNameValue[0], value=cookieNameValue[1], port=443, port_specified=False, domain=swaDomain, domain_specified=True, domain_initial_dot=False, path=pathNameValue[1], path_specified=True, secure=True, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cj.set_cookie(ck) return cj
def __init__(self, filename, autoconnect=True, policy=None): experimental("Firefox3CookieJar is experimental code") CookieJar.__init__(self, policy) if filename is not None and not isinstance(filename, str): raise ValueError("filename must be string-like") self.filename = filename self._conn = None if autoconnect: self.connect()
class BuiltinBrowser(BaseBrowser): def __init__(self, base_url=None): base_url = get_default_workflowy_url(base_url) super().__init__(base_url) self.cookie_jar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar)) def open(self, url, *, _raw=False, _query=None, **kwargs): full_url = urljoin(self.base_url, url) if _query is not None: full_url += "?" + urlencode(_query) data = urlencode(kwargs).encode() headers = { "Content-Type" : "application/x-www-form-urlencoded", } req = Request(full_url, data, headers) res = self.opener.open(req) with closing(res) as fp: content = fp.read() content = content.decode() if not _raw: # TODO: must not raise 404 error content = json.loads(content) return res, content def set_cookie(self, name, value): url = urlparse(self.base_url) cookie = Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=url.netloc, domain_specified=False, domain_initial_dot=False, path=url.path, path_specified=True, secure=False, expires=sys.maxsize, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False, ) self.cookie_jar.set_cookie(cookie)
def cookiejar_from_dict(cookie_dict, cookiejar=None): """Returns a CookieJar from a key/value dictionary. :param cookie_dict: Dict of key/values to insert into CookieJar. """ if not isinstance(cookie_dict, CookieJar): if cookiejar is None: cookiejar = CookieJar() if cookie_dict is not None: for name in cookie_dict: cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) return cookiejar else: return cookie_dict
def _cookies_for_domain(self, domain, request): if not self._policy.domain_return_ok(domain, request): return [] debug("Checking %s for cookies to return", domain) if self.delayload: self._delayload_domain(domain) return CookieJar._cookies_for_domain(self, domain, request)
def cookiesjar(self): """Returns cookie jar object """ if not self._cookies_jar: self._cookies_jar = CookieJar() # add cookies from self._cookies return self._cookies_jar
def cookiejar_from_dict(*cookie_dicts): """Returns a CookieJar from a key/value dictionary. :param cookie_dict: Dict of key/values to insert into CookieJar. """ cookie_dicts = tuple((d for d in cookie_dicts if d)) if len(cookie_dicts) == 1 and isinstance(cookie_dicts[0], CookieJar): return cookie_dicts[0] cookiejar = CookieJar() for cookie_dict in cookie_dicts: if isinstance(cookie_dict, CookieJar): for cookie in cookie_dict: cookiejar.set_cookie(cookie) else: for name in cookie_dict: cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) return cookiejar
def test_length(self): cookie_jar = CookieJar() policy = DeFactoCookiePolicy(cookie_jar=cookie_jar) cookie_jar.set_policy(policy) request = urllib.request.Request('http://example.com/') response = FakeResponse( [ 'Set-Cookie: k={0}'.format('a' * 400) ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) print(cookie_jar._cookies) self.assertTrue(cookie_jar._cookies['example.com']['/'].get('k')) request = urllib.request.Request('http://example.com/') response = FakeResponse( [ 'Set-Cookie: k={0}'.format('a' * 5000) ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) self.assertFalse(cookie_jar._cookies['example.com']['/'].get('k2'))
def set_cookie(self, cookie): if cookie.discard: CookieJar.set_cookie(self, cookie) return def set_cookie(cur): # XXX # is this RFC 2965-correct? # could this do an UPDATE instead? row = self._row_from_cookie(cookie, cur) name, unused, domain, path = row[1:5] cur.execute("""\ DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", (domain, path, name)) cur.execute("""\ INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, row) self._transaction(set_cookie)
def test_empty_value(self): cookie_jar = CookieJar() policy = DeFactoCookiePolicy(cookie_jar=cookie_jar) cookie_jar.set_policy(policy) request = urllib.request.Request('http://example.com/') response = FakeResponse( [ 'Set-Cookie: k' ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) print(cookie_jar._cookies) self.assertTrue(cookie_jar._cookies.get('example.com'))
def clear(self, domain=None, path=None, name=None): CookieJar.clear(self, domain, path, name) where_parts = [] sql_params = [] if domain is not None: where_parts.append("host = ?") sql_params.append(domain) if path is not None: where_parts.append("path = ?") sql_params.append(path) if name is not None: where_parts.append("name = ?") sql_params.append(name) where = " AND ".join(where_parts) if where: where = " WHERE " + where def clear(cur): cur.execute("DELETE FROM moz_cookies%s" % where, tuple(sql_params)) self._transaction(clear)
def __init__(self, username, password, bank=SWEDBANK): """ Set default stuff """ self.data = "" self.pch = None self.authkey = None self.cj = CookieJar() self.profile = None self.account = None self.useragent = None self.bankid = None self.login(username, password, bank)
def _cookies_for_request(self, request): session_cookies = CookieJar._cookies_for_request(self, request) def get_cookies(cur): query = cur.execute("SELECT host from moz_cookies") domains = [row[0] for row in query.fetchall()] cookies = [] for domain in domains: cookies += self._persistent_cookies_for_domain(domain, request, cur) return cookies persistent_coookies = self._transaction(get_cookies) return session_cookies + persistent_coookies
def __init__(self): self.cookiejar = CookieJar() self._cookie_processor = HTTPCookieProcessor(self.cookiejar) self.form = None self.url = "http://0.0.0.0:8080/" self.path = "/" self.status = None self.data = None self._response = None self._forms = None
def test_ascii(self): # Differences with FakeResponse: # On Python 3, MIME encoded-word syntax is used # On Python 2, U backslash syntax is used but it's not decoded back. cookie_jar = CookieJar() policy = DeFactoCookiePolicy(cookie_jar=cookie_jar) cookie_jar.set_policy(policy) request = urllib.request.Request('http://example.com/') response = FakeResponse( [ 'Set-Cookie: k=ðŸ�' ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) print(cookie_jar._cookies) self.assertFalse(cookie_jar._cookies.get('example.com'))
def _authenticate(self, username, password): ''' Authenticate to Google: 1 - make a GET request to the Login webpage so we can get the login form 2 - make a POST request with email, password and login form input values ''' # Make sure we get CSV results in English ck1 = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) # This cookie is now mandatory # Not sure what the value represents but too many queries from the same value # lead to a Quota Exceeded error. # random_six_char = ''.join(random.choice('0123456789abcdef') for n in xrange(6)) ck2 = Cookie(version=0, name='PREF', value='0000', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) self.cj = CookieJar() self.cj.set_cookie(ck1) self.cj.set_cookie(ck2) if not self.proxy: self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) else: proxy = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy}) self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj), proxy) self.opener.addheaders = self.headers # Get all of the login form input values find_inputs = etree.XPath("//form[@id='gaia_loginform']//input") resp = self.opener.open(self.url_login) data = self.read_gzipped_response(resp).decode() try: xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True)) for input in find_inputs(xmlTree): name = input.get('name') if name: name = name.encode('utf8') value = input.get('value', '').encode('utf8') self.login_params[name] = value except: raise AuthFailedException(("Exception while parsing: %s\n" % traceback.format_exc())) self.login_params["Email".encode('utf8')] = username.encode('utf8') self.login_params["Passwd".encode('utf8')] = password.encode('utf8') params = urllib.parse.urlencode(self.login_params) auth_resp = self.opener.open(self.url_authenticate, params.encode()) # Testing whether Authentication was a success # I noticed that a correct auth sets a few cookies if not self.is_authentication_successfull(auth_resp): raise AuthFailedException('Warning: Authentication failed for user %s' % username)
def __init__(self, config_name_name): cfg_file = file(config_name_name) self.config = config.Config(cfg_file) logging.info('Config file %s loaded!', config_name_name) self.cookie_jar = CookieJar() self.opener = build_opener( HTTPCookieProcessor(self.cookie_jar) ) self.logged_ = False self.list_loaded_ = False self.api_url = 'http://' + self.config.api_domain self.shows_data = {} self.episodes_data = {} self.watched_data = {}
def __init__(self, **kwargs): """ @param kwargs: Keyword arguments. - B{proxy} - An http proxy to be specified on requests. The proxy is defined as {protocol:proxy,} - type: I{dict} - default: {} - B{timeout} - Set the url open timeout (seconds). - type: I{float} - default: 90 """ Transport.__init__(self) Unskin(self.options).update(kwargs) self.cookiejar = CookieJar() self.proxy = {} self.urlopener = None
def __init__(self, host=None, apiurl='/w/api.php', timeout=100, srlimit=500, apfrom=None, aplimit=5000, bllimit=5000, aulimit=5000, aclimit=5000, rclimit=5000, lelimit=5000, ): if not host: raise(Exception("host not defined")) self.host = host self.apiurl = apiurl self.url = '%s%s' % (self.host, self.apiurl) self.format = 'json' self.cj = CookieJar() self.opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(self.cj) ) self.token = None self.defaults = {} self.defaults['srlimit'] = srlimit self.defaults['aplimit'] = aplimit self.defaults['aclimit'] = aclimit self.defaults['bllimit'] = bllimit self.defaults['rclimit'] = rclimit self.defaults['lelimit'] = lelimit self.srlimit = srlimit self.apfrom = apfrom self.aplimit = aplimit self.bllimit = bllimit self.aulimit = aulimit self.aclimit = aclimit self.rclimit = rclimit self.lelimit = lelimit self.search_info = {} self.aufinished = False
def test_domain_limit(self): cookie_jar = CookieJar() policy = DeFactoCookiePolicy(cookie_jar=cookie_jar) cookie_jar.set_policy(policy) request = urllib.request.Request('http://example.com/') for key in range(55): response = FakeResponse( [ 'Set-Cookie: k{0}=a'.format(key) ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) if key < 50: self.assertTrue( cookie_jar._cookies['example.com']['/'] .get('k{0}'.format(key)) ) else: self.assertFalse( cookie_jar._cookies['example.com']['/'] .get('k{0}'.format(key)) ) response = FakeResponse( [ 'Set-Cookie: k3=b' ], 'http://example.com/' ) cookie_jar.extract_cookies(response, request) self.assertEqual( 'b', cookie_jar._cookies['example.com']['/']['k3'].value )
def testCookieAdapters(self): jar = CookieJar(policy=None) # DefaultCookiePolicy()) # set a cookie res = Response() tstval = str(uuid.uuid4()) res.set_cookie("a-cookie", tstval, domain="example.com") cookies = jar.make_cookies(filters.ResponseCookieAdapter(res), Request.blank("http://example.com")) for c in cookies: jar.set_cookie(c) self.assert_(len(jar), ("where's my cookies?")) self.assert_("a-cookie" in [c.name for c in jar], "seriously, where's my cookie") # now put the header on the request please request = Request.blank("http://example.com") self.assert_(".example.com" in jar._cookies.keys(), jar._cookies.keys()) jar.add_cookie_header(filters.RequestCookieAdapter(request)) self.assert_("Cookie" in request.headers, (str(request), "Y NO COOKIES?"))
def get_json_data(self, url): opener = build_opener(HTTPCookieProcessor(CookieJar())) opener.addheaders.append(('Cookie', self.cookie)) response = opener.open(url) return json.load(response)
from index import application import unittest from webtest import TestApp, TestResponse import sys from http.cookiejar import CookieJar test_app = TestApp(application, cookiejar=CookieJar()) test_response = TestResponse() sys.path.append('../') # os.environ['WEBTEST_TARGET_URL'] = 'http://localhost:8888' # test_app.set_cookie('login', '2524') # test_app.authorization() class BlackBoxTest(unittest.TestCase): def setUp(self): test_app.post('/aluno_cadastro', dict(aluno_nome='eric', senha='idle')) test_app.post('/login', dict(usuario='eric', senha='idle')) def _fixaluno_turma(self): res = test_app.post('/aluno_cadastro', dict(aluno_nome='egg', senha='spam')) res2 = test_app.get('/turma_cadastro', dict(turma_nome='ni')) res3 = test_app.get('/turma_cadastro', dict(turma_nome='parrot')) # res4 = test_app.get('/cadastro_item' , dict(nome='burro quando foge',tipo='3',preco='5')) def _test_index(self): """ o indice desse servidor """ res = test_app.get('/')
class Browser(object): def __init__(self): self.cookiejar = CookieJar() self._cookie_processor = HTTPCookieProcessor(self.cookiejar) self.form = None self.url = "http://0.0.0.0:8080/" self.path = "/" self.status = None self.data = None self._response = None self._forms = None @property def text(self): return self.data.decode('utf-8') def reset(self): """Clears all cookies and history.""" self.cookiejar.clear() def build_opener(self): """Builds the opener using (urllib2/urllib.request).build_opener. Subclasses can override this function to prodive custom openers. """ return urllib_build_opener() def do_request(self, req): if DEBUG: print('requesting', req.get_method(), req.get_full_url()) opener = self.build_opener() opener.add_handler(self._cookie_processor) try: self._response = opener.open(req) except HTTPError as e: self._response = e self.url = self._response.geturl() self.path = get_selector(Request(self.url)) self.data = self._response.read() self.status = self._response.code self._forms = None self.form = None return self.get_response() def open(self, url, data=None, headers={}): """Opens the specified url.""" url = urljoin(self.url, url) req = Request(url, data, headers) return self.do_request(req) def show(self): """Opens the current page in real web browser.""" f = open('page.html', 'w') f.write(self.data) f.close() url = 'file://' + os.path.abspath('page.html') webbrowser.open(url) def get_response(self): """Returns a copy of the current response.""" return addinfourl(BytesIO(self.data), self._response.info(), self._response.geturl()) def get_soup(self): """Returns beautiful soup of the current document.""" import BeautifulSoup return BeautifulSoup.BeautifulSoup(self.data) def get_text(self, e=None): """Returns content of e or the current document as plain text.""" e = e or self.get_soup() return ''.join([htmlunquote(c) for c in e.recursiveChildGenerator() if isinstance(c, text_type)]) def _get_links(self): soup = self.get_soup() return [a for a in soup.findAll(name='a')] def get_links(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None): """Returns all links in the document.""" return self._filter_links(self._get_links(), text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate) def follow_link(self, link=None, text=None, text_regex=None, url=None, url_regex=None, predicate=None): if link is None: links = self._filter_links(self.get_links(), text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate) link = links and links[0] if link: return self.open(link['href']) else: raise BrowserError("No link found") def find_link(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None): links = self._filter_links(self.get_links(), text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate) return links and links[0] or None def _filter_links(self, links, text=None, text_regex=None, url=None, url_regex=None, predicate=None): predicates = [] if text is not None: predicates.append(lambda link: link.string == text) if text_regex is not None: predicates.append(lambda link: re_compile(text_regex).search(link.string or '')) if url is not None: predicates.append(lambda link: link.get('href') == url) if url_regex is not None: predicates.append(lambda link: re_compile(url_regex).search(link.get('href', ''))) if predicate: predicate.append(predicate) def f(link): for p in predicates: if not p(link): return False return True return [link for link in links if f(link)] def get_forms(self): """Returns all forms in the current document. The returned form objects implement the ClientForm.HTMLForm interface. """ if self._forms is None: import ClientForm self._forms = ClientForm.ParseResponse(self.get_response(), backwards_compat=False) return self._forms def select_form(self, name=None, predicate=None, index=0): """Selects the specified form.""" forms = self.get_forms() if name is not None: forms = [f for f in forms if f.name == name] if predicate: forms = [f for f in forms if predicate(f)] if forms: self.form = forms[index] return self.form else: raise BrowserError("No form selected.") def submit(self, **kw): """submits the currently selected form.""" if self.form is None: raise BrowserError("No form selected.") req = self.form.click(**kw) return self.do_request(req) def __getitem__(self, key): return self.form[key] def __setitem__(self, key, value): self.form[key] = value
def download_url(self, url, referrer=None, user_agent=None, verbose=False): """Download a URL (likely http or RSS) from the web and return its contents as a str. Allow for possible vagaries like cookies, redirection, compression etc. """ if verbose: print("download_url", url, "referrer=", referrer, \ "user_agent", user_agent, file=sys.stderr) request = urllib.request.Request(url) # If we're after the single-page URL, we may need a referrer if referrer: if verbose: print("Adding referrer", referrer, file=sys.stderr) request.add_header('Referer', referrer) if not user_agent: user_agent = VersionString request.add_header('User-Agent', user_agent) if verbose: print("Using User-Agent of", user_agent, file=sys.stderr) # Allow for cookies in the request: some sites, notably nytimes.com, # degrade to an infinite redirect loop if cookies aren't enabled. cj = CookieJar() opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj)) response = opener.open(request, timeout=100) # Lots of ways this can fail. # e.g. ValueError, "unknown url type" # or BadStatusLine: '' # At this point it would be lovely to check whether the # mime type is HTML or RSS. Unfortunately, all we have is a # httplib.HTTPMessage instance which is completely # undocumented (see http://bugs.python.org/issue3428). # It's not documented, but sometimes after urlopen # we can actually get a content type. If it's not # text/something, that's bad. ctype = response.headers['content-type'] if ctype and ctype != '' and not ctype.startswith("text") \ and not ctype.startswith("application/rss") \ and not ctype.startswith("application/xml") \ and not ctype.startswith("application/x-rss+xml") \ and not ctype.startswith("application/atom+xml"): print(url, "isn't text -- content-type was", \ ctype, ". Skipping.", file=sys.stderr) response.close() raise RuntimeError("Contents not text (%s)! %s" % (ctype, url)) # Were we redirected? geturl() will tell us that. self.cur_url = response.geturl() # but sadly, that means we need another request object # to parse out the host and prefix: real_request = urllib.request.Request(self.cur_url) real_request.add_header('User-Agent', user_agent) # A few sites, like http://nymag.com, gzip their http. # urllib2 doesn't handle that automatically: we have to ask for it. # But some other sites, like the LA Monitor, return bad content # if you ask for gzip. if self.config.getboolean(self.feedname, 'allow_gzip'): request.add_header('Accept-encoding', 'gzip') # feed() is going to need to know the host, to rewrite urls. # So save host and prefix based on any redirects we've had: # feedmeparser will need them. self.host = real_request.host self.prefix = real_request.type + '://' + self.host + '/' # urllib2 unfortunately doesn't read unicode, # so try to figure out the current encoding: if not self.encoding: if verbose: print( "download_url: self.encoding not set, getting it from headers", file=sys.stderr) self.encoding = response.headers.get_content_charset() enctype = response.headers['content-type'].split('charset=') if len(enctype) > 1: self.encoding = enctype[-1] else: if verbose: print("Defaulting to utf-8", file=sys.stderr) self.encoding = 'utf-8' if verbose: print("final encoding is", self.encoding, file=sys.stderr) # Is the URL gzipped? If so, we'll need to uncompress it. is_gzip = response.info().get('Content-Encoding') == 'gzip' # Read the content of the link: # This can die with socket.error, "connection reset by peer" # And it may not set html, so initialize it first: contents = None try: contents = response.read() # XXX Need to guard against IncompleteRead -- but what class owns it?? #except httplib.IncompleteRead, e: # print >>sys.stderr, "Ignoring IncompleteRead on", url except Exception as e: print("Unknown error from response.read()", url, file=sys.stderr) # contents can be undefined here. If so, no point in doing anything else. if not contents: print("Didn't read anything from response.read()", file=sys.stderr) response.close() raise NoContentError if is_gzip: buf = io.BytesIO(contents) f = gzip.GzipFile(fileobj=buf) contents = f.read() # No docs say I should close this. I can only assume. response.close() # response.read() returns bytes. Convert to str as soon as possible # so the rest of the program can work with str. return contents.decode(encoding=self.encoding)
class MyShowsRu(object): """ work with api.myshows.ru """ def __init__(self, config_name_name): cfg_file = file(config_name_name) self.config = config.Config(cfg_file) logging.info('Config file %s loaded!', config_name_name) self.cookie_jar = CookieJar() self.opener = build_opener( HTTPCookieProcessor(self.cookie_jar) ) self.logged_ = False self.list_loaded_ = False self.api_url = 'http://' + self.config.api_domain self.shows_data = {} self.episodes_data = {} self.watched_data = {} def do_login(self): """ authorization """ if self.logged_: return try: req_data = urllib.urlencode({ 'login': self.config.login.name, 'password': self.config.login.md5pass }) logging.debug( 'Login, url: %s%s, data: %s', self.api_url, self.config.url.login, req_data ) request = Request( self.api_url + self.config.url.login, req_data ) handle = self.opener.open(request) logging.debug('Login result: %s/%s', handle.headers, handle.read()) self.cookie_jar.clear( self.config.api_domain, '/', 'SiteUser[login]' ) self.cookie_jar.clear( self.config.api_domain, '/', 'SiteUser[password]' ) except HTTPError as ex: if ex.code == 403: stderr.write('Bad login name or password!\n') else: stderr.write('Login error!\n') logging.debug('HTTP error #%s: %s\n', ex.code, ex.read()) exit(1) except URLError as ex: stderr.write('Login error!\n') logging.debug('URLError - %s\n', ex.reason) exit(1) self.logged_ = True def load_shows(self): """ load user shows """ if self.list_loaded_: return if not self.logged_: self.do_login() logging.debug('Login: %s%s', self.api_url, self.config.url.list_shows) request = Request( self.api_url + self.config.url.list_shows ) handle = self.opener.open(request) self.shows_data = json.loads(handle.read()) self.list_loaded_ = True def list_all_shows(self): """ list all user shows """ self.load_shows() print() for show_id in sorted( self.shows_data, key=lambda show_id: self.shows_data[show_id]['title'] ): next_show = self.shows_data[show_id] if next_show['watchedEpisodes'] <= 0: show_sign = '-' elif next_show['watchedEpisodes'] < next_show['totalEpisodes']: show_sign = '+' else: show_sign = ' ' alias = self.alias_by_title(next_show['title']) if not alias: alias = '-' print('{0}{1}({7}): {2}/{3} ({4}%), rating = {5}({6})'.format( show_sign, tr_out(next_show['title']), # next_show['ruTitle'], next_show['watchedEpisodes'], next_show['totalEpisodes'], 100 * next_show['watchedEpisodes'] / next_show['totalEpisodes'], next_show['rating'], next_show['watchStatus'][0], alias )) print() def list_show(self, alias): """ list user show by alias """ re_m = re.match(r'^(.*\D)(\d{1,2}){0,1}$', alias) if not re_m: print('Bad format for list - "{0}"'.format(alias)) else: season = -1 if re_m.lastindex == 2: season = int(re_m.group(2)) show_id = self.id_by_title( self.title_by_alias(re_m.group(1), no_exit=True) ) epis = self.load_episodes(show_id) episodes = epis['episodes'] list_map = {} for epi_id in episodes: next_episode = episodes[epi_id] if season == -1 or next_episode['seasonNumber'] == season: list_map[ next_episode['seasonNumber'] * 1000 + next_episode['episodeNumber'] ] = next_episode watched = self.load_watched(show_id) current_season = -1 for epi_num in sorted(list_map.keys()): next_episode = list_map[epi_num] next_season = next_episode['seasonNumber'] if current_season != next_season: current_season = next_season print('{0} Season {1}:'.format( tr_out(epis['title']), current_season )) comment = '' epi_id = str(next_episode['id']) if epi_id in watched: comment = 'watched ' + watched[epi_id]['watchDate'] print(' "{0}" (s{1:02d}e{2:02d}) {3}'.format( tr_out(next_episode['title']), next_episode['seasonNumber'], next_episode['episodeNumber'], comment )) def list_shows(self, alias): """ list user shows """ if alias == 'all': self.list_all_shows() else: self.list_show(alias) def title_by_alias(self, query, no_exit=False): """ return show id by alias """ logging.debug('title_by_alias(%s)', query) alias = query.lower() if alias not in self.config.alias: logging.debug('Unknown alias - "%s"', alias) if no_exit: print('Cannot find alias "{0}", will try it as title!'.format(query)) return query else: print('Unknown alias - {0}'.format(query)) exit(1) else: logging.debug('title_by_alias(%s) = %s', query, self.config.alias[alias]) return self.config.alias[alias] def alias_by_title(self, title): """ return show alias by title """ logging.debug('alias_by_title(%s)', title) for alias, a_title in self.config.alias.iteritems(): if a_title == title: return alias return '' def id_by_title(self, title): """ return show id by title """ logging.debug('id_by_title(%s)', title) if not self.list_loaded_: self.load_shows() for show_id in self.shows_data: next_show = self.shows_data[show_id] logging.debug('id_by_title(%s) = %s', next_show['title'], show_id) if next_show['title'] == title: logging.debug('Found id_by_title(%s) = %s', title, show_id) return show_id print('Unknown title - {0}'.format(title)) exit(1) def load_episodes(self, show_id): """ load episode data by show id """ if not self.logged_: self.do_login() if show_id not in self.episodes_data: logging.debug( 'Load episodes: %s%s', self.api_url, self.config.url.list_episodes.format(show_id) ) request = Request( self.api_url + self.config.url.list_episodes.format(show_id) ) handle = self.opener.open(request) self.episodes_data[show_id] = json.loads(handle.read()) return self.episodes_data[show_id] def load_watched(self, show_id): """ load watched data by show id """ if not self.logged_: self.do_login() if show_id not in self.watched_data: logging.debug( 'Load watched: %s%s', self.api_url, self.config.url.list_watched.format(show_id) ) request = Request( self.api_url + self.config.url.list_watched.format(show_id) ) handle = self.opener.open(request) self.watched_data[show_id] = json.loads(handle.read()) return self.watched_data[show_id] def get_last_watched(self, show_id): """ return last watched episode id for show id """ logging.debug('Searching last watched for show %s', show_id) episodes = self.load_episodes(show_id)['episodes'] watched = self.load_watched(show_id) last_number = 0 episode_id = None for epi_id in watched: logging.debug('Next watched id: %s', epi_id) next_episode = episodes[epi_id] logging.debug( 'Trying next episode: %s - %s (id: %s/seq: %s)', next_episode['shortName'], next_episode['title'], next_episode['id'], next_episode['sequenceNumber'] ) if last_number < next_episode['sequenceNumber']: last_number = next_episode['sequenceNumber'] episode_id = epi_id logging.debug( 'Saved next last episode: %s - %s (id: %s)', next_episode['shortName'], next_episode['title'], episode_id ) logging.debug('Found last watched %s', episode_id) return episode_id def show_last_watched_by_alias(self, alias): """ show last watched episode for alias """ show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True)) epis = self.load_episodes(show_id) watched = self.load_watched(show_id) episode_id = self.get_last_watched(show_id) print() if episode_id is None: print('{0} is unwatched'.format(tr_out(epis['title']))) else: episode = epis['episodes'][episode_id] print('Last for {0} is s{1:02d}e{2:02d} ("{3}") at {4}'.format( tr_out(epis['title']), episode['seasonNumber'], episode['episodeNumber'], tr_out(episode['title']), watched[episode_id]['watchDate'] )) print() def show_last_watched_by_date(self, alias): """ show last watched episode(s) for date """ date_to = datetime.date.today() if alias == 'day': date_from = date_to + datetime.timedelta(days=-1) elif alias == 'week': date_from = date_to + datetime.timedelta(days=-7) elif alias == 'month': prev_month = date_to.replace(day=1) + datetime.timedelta(days=-1) date_from = date_to + datetime.timedelta(days=-prev_month.day) else: print('Unknown alias - {0}'.format(alias)) exit(1) self.load_shows() print() print('Watched from {0} to {1}'.format( date_from.strftime('%Y-%m-%d'), date_to.strftime('%Y-%m-%d') )) print() re_c = re.compile(r'(\d{1,2})\.(\d{1,2})\.(\d{4})') count = 0 for show_id in self.shows_data: next_show = self.shows_data[show_id] if next_show['watchedEpisodes'] <= 0: continue watched = self.load_watched(next_show['showId']) epis = None last_map = {} for epi_id in watched: next_episode = watched[epi_id] re_m = re_c.match(next_episode['watchDate']) if not re_m: print('Warning: unknown date format - {0}'.format( next_episode['watchDate'])) continue dtv = [int(s) for s in re_m.group(3, 2, 1)] epi_date = datetime.date(dtv[0], dtv[1], dtv[2]) if date_from <= epi_date and epi_date <= date_to: if not epis: epis = self.load_episodes(show_id) count += 1 if epi_id not in epis['episodes']: print('Episode not found: {0}'.format(epi_id)) logging.debug('Episodes:') logging.debug(epis) continue else: episode = epis['episodes'][epi_id] date_key = epi_date.toordinal() * 1000\ + episode['seasonNumber'] * 10\ + episode['episodeNumber'] last_map[date_key] = episode for date_key in sorted(last_map.keys()): episode = last_map[date_key] print('{0} s{1:02d}e{2:02d} "{3}" at {4}'.format( tr_out(epis['title']), episode['seasonNumber'], episode['episodeNumber'], tr_out(episode['title']), watched[str(episode['id'])]['watchDate'] )) print() print('Total count: {0}'.format(count)) print() def show_last_watched(self, query): """ show last watched episode(s) """ alias = query.lower() if alias in ['day', 'week', 'month']: self.show_last_watched_by_date(alias) else: self.show_last_watched_by_alias(query) def get_first_unwatched(self, show_id): """ return first unwathced episode for show id """ logging.debug('Searching first unwatched for show %s', show_id) episodes = self.load_episodes(show_id)['episodes'] last_watched = self.get_last_watched(show_id) if last_watched is None: last_watched = 0 else: logging.debug( 'Last watched is: %s - %s (%s)', episodes[last_watched]['shortName'], episodes[last_watched]['title'], episodes[last_watched]['sequenceNumber'] ) last_watched = episodes[last_watched]['sequenceNumber'] logging.debug('Last watched: %s', last_watched) episode_id = None first_unwatched = None for epi_id in episodes: next_episode = episodes[epi_id] logging.debug( 'Trying next episode: %s - %s (%s)', next_episode['shortName'], next_episode['title'], next_episode['sequenceNumber'] ) if ( (first_unwatched > next_episode['sequenceNumber'] or not first_unwatched) and last_watched < next_episode['sequenceNumber'] ): # first_unwatched = next_episode['sequenceNumber'] episode_id = epi_id logging.debug( 'Saved next last unwatched: %s - %s (%s)', next_episode['shortName'], next_episode['title'], next_episode['sequenceNumber'] ) return episode_id def show_next_for_watch(self, alias): """ show next episode for watch for alias """ show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True)) epis = self.load_episodes(show_id) episode_id = self.get_first_unwatched(show_id) if episode_id is None: print("\nCannot find first watch for {0}\n".format(tr_out(epis['title']))) else: episode = epis['episodes'][episode_id] print('\nFirst watch for {0} is s{1:02d}e{2:02d} ("{3}")\n'.format( tr_out(epis['title']), episode['seasonNumber'], episode['episodeNumber'], tr_out(episode['title']), )) def set_episode_check(self, alias, epi, check): """ set epi episode as watched """ re_m = re.match(r's(\d{1,2})e(\d{1,2})', epi.lower()) if not re_m: print('Bad format for check - "{0}"'.format(epi)) else: season = int(re_m.group(1)) episode = int(re_m.group(2)) show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True)) epis = self.load_episodes(show_id) watched = self.load_watched(show_id) episodes = epis['episodes'] for epi_id in episodes: next_episode = episodes[epi_id] if ( next_episode['seasonNumber'] == season and next_episode['episodeNumber'] == episode ): valid_op = False old_date = '' if check: msg = 'checked' if epi_id in watched: old_date = watched[epi_id]['watchDate'] else: url = self.config.url.check_episode.format(epi_id) valid_op = True else: msg = 'unchecked' if epi_id in watched: url = self.config.url.uncheck_episode.format(epi_id) valid_op = True if not valid_op: print() print('Episode "{0}" (s{1:02d}e{2:02d}) of "{3}" already {4} {5}'\ .format( tr_out(next_episode['title']), next_episode['seasonNumber'], next_episode['episodeNumber'], tr_out(epis['title']), msg, old_date )) else: logging.debug('Set checked: %s%s', self.api_url, url) request = Request(self.api_url + url) self.opener.open(request) print() print( 'Episode "{0}" (s{1:02d}e{2:02d}) of "{3}" set {4}'\ .format( tr_out(next_episode['title']), next_episode['seasonNumber'], next_episode['episodeNumber'], tr_out(epis['title']), msg )) break def search_show(self, query): """ search show """ if not self.logged_: self.do_login() req_data = urllib.urlencode({ 'q': query, }) logging.debug( 'Search url/data: %s%s%s', self.api_url, self.config.url.search, req_data ) request = Request( self.api_url + self.config.url.search, req_data ) handle = self.opener.open(request) search_result = json.loads(handle.read()) logging.debug('Search result: %s', search_result) return search_result def show_search_result(self, query): """ show search result """ search_result = self.search_show(query) print() for show_id in search_result: show = search_result[show_id] print('"{1}", started: {2} (id={0})'.format( show_id, tr_out(show['title']), show['started'] )) print() def set_show_status(self, alias, status, accurate): """ set show status """ title = self.title_by_alias(alias, no_exit=True) search_result = self.search_show(title) for show_id in search_result: show = search_result[show_id] if accurate and show['title'] != alias: continue url = self.config.url.status.format(show['id'], status) logging.debug('Set show status: %s%s', self.api_url, url) request = Request(self.api_url + url) self.opener.open(request) print('Show "{0}" status set to {1}'.format( tr_out(show['title']), status )) print()
def __init__(self, parent, acc): self._acc = acc self._gmail_cookies_key = '' self._gmail_cookies = CookieJar() builder = Gtk.Builder() builder.set_translation_domain(PACKAGE_NAME) builder.add_from_file(get_data_file("account_widget.ui")) builder.connect_signals({ "account_type_changed": self._on_cmb_account_type_changed, "entry_changed": self._on_entry_changed, "expander_folders_activate": self._on_expander_folders_activate, "password_info_icon_released": self._on_password_info_icon_released, "log_in": self._on_log_in, "gmail_label_add": self._on_gmail_label_add, "gmail_label_remove": self._on_gmail_label_remove, "gmail_label_edited": self._on_gmail_label_edited, "gmail_label_mode_edited": self._on_gmail_label_mode_edited, "gmail_labels_selection_changed": self._on_gmail_labels_selection_changed, }) self._window = Gtk.Dialog(title = _('Mail Account'), parent = parent, use_header_bar = True, \ buttons = (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OK, Gtk.ResponseType.OK)) self._window.set_default_response(Gtk.ResponseType.OK) self._window.set_default_size(400, 0) self._box = self._window.get_content_area() self._box.set_border_width(12) self._box.set_spacing(12) self._box.pack_start(builder.get_object("account_widget"), True, True, 0) self._cmb_account_type = builder.get_object("cmb_account_type") self._label_log_in = builder.get_object("label_log_in") self._label_gmail_labels = builder.get_object("label_gmail_labels") self._box_gmail_labels = builder.get_object("box_gmail_labels") self._button_log_in = builder.get_object("button_log_in") self._button_remove_gmail_label = builder.get_object( "button_remove_gmail_label") self._label_account_name = builder.get_object("label_account_name") self._entry_account_name = builder.get_object("entry_account_name") self._label_account_user = builder.get_object("label_account_user") self._entry_account_user = builder.get_object("entry_account_user") self._label_account_password = builder.get_object( "label_account_password") self._entry_account_password = builder.get_object( "entry_account_password") self._label_account_server = builder.get_object("label_account_server") self._entry_account_server = builder.get_object("entry_account_server") self._label_account_port = builder.get_object("label_account_port") self._entry_account_port = builder.get_object("entry_account_port") self._label_account_file_path = builder.get_object( "label_account_file_path") self._chooser_account_file_path = builder.get_object( "chooser_file_path") self._label_account_directory_path = builder.get_object( "label_account_directory_path") self._chooser_account_directory_path = builder.get_object( "chooser_directory_path") self._expander_folders = builder.get_object("expander_folders") self._overlay = builder.get_object("overlay") self._treeview_folders = builder.get_object("treeview_folders") self._liststore_folders = builder.get_object("liststore_folders") self._liststore_gmail_labels = builder.get_object( "liststore_gmail_labels") self._treeview_gmail_labels = builder.get_object( "treeview_gmail_labels") self._chk_account_push = builder.get_object("chk_account_push") self._chk_account_ssl = builder.get_object("chk_account_ssl") self._button_ok = self._window.get_widget_for_response( Gtk.ResponseType.OK) self._button_ok.set_sensitive(False) self._error_label = None self._folders_received = False self._selected_folder_count = 0 self._pwd_info_icon = self._entry_account_password.get_icon_name( Gtk.EntryIconPosition.SECONDARY) self._entry_account_port.set_placeholder_text(_("optional")) renderer_folders_enabled = Gtk.CellRendererToggle() renderer_folders_enabled.connect("toggled", self._on_folder_toggled) column_folders_enabled = Gtk.TreeViewColumn(_('Enabled'), renderer_folders_enabled) column_folders_enabled.add_attribute(renderer_folders_enabled, "active", 0) column_folders_enabled.set_alignment(0.5) self._treeview_folders.append_column(column_folders_enabled) renderer_folders_name = Gtk.CellRendererText() column_folders_name = Gtk.TreeViewColumn(_('Name'), renderer_folders_name, text=1) column_folders_name.set_cell_data_func(renderer_folders_name, folder_cell_data) self._treeview_folders.append_column(column_folders_name) self._on_gmail_labels_selection_changed( self._treeview_gmail_labels.get_selection())
def s1_download(argument_list): '''Function to download a single Sentinel-1 product from Copernicus scihub This function will download S1 products from ESA's apihub. Args: argument_list: a list with 4 entries (this is used to enable parallel execution) argument_list[0] is the product's uuid argument_list[1] is the local path for the download argument_list[2] is the username of Copernicus' scihub argument_list[3] is the password of Copernicus' scihub ''' # get out the arguments uuid = argument_list[0] filename = argument_list[1] uname = argument_list[2] pword = argument_list[3] # ask for username and password in case you have not defined as input if not uname: logger.debug('If you do not have a Copernicus Scihub user' ' account go to: https://scihub.copernicus.eu') uname = input(' Your Copernicus Scihub Username:'******' Your Copernicus Scihub Password:'******'https://scihub.copernicus.eu/apihub/odata/v1/' 'Products(\'{}\')/$value'.format(uuid)) # get first response for file Size response = requests.get(url, stream=True, auth=(uname, pword)) # check response if response.status_code == 401: raise ValueError(' ERROR: Username/Password are incorrect.') elif response.status_code == 404: logger.debug('Product %s missing from the archive, continuing.', filename.split('/')[-1]) return filename.split('/')[-1] elif response.status_code != 200: logger.debug( 'ERROR: Something went wrong, will try again in 30 seconds.') response.raise_for_status() password_manager = urlreq.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "https://scihub.copernicus.eu/apihub/", uname, pword) cookie_jar = CookieJar() opener = urlreq.build_opener(urlreq.HTTPBasicAuthHandler(password_manager), urlreq.HTTPCookieProcessor(cookie_jar)) urlreq.install_opener(opener) # get download size total_length = int(response.headers.get('content-length', 0)) # check if file is partially downloaded if os.path.exists(filename): first_byte = os.path.getsize(filename) else: first_byte = 0 if first_byte >= total_length: return str('{}.downloaded'.format(filename)) zip_test = 1 while zip_test is not None and zip_test <= 10: logger.debug('INFO: Downloading scene to: {}'.format(filename)) with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: filename, headers = urlreq.urlretrieve(url, filename=filename, reporthook=t.update_to) # zipFile check logger.debug( 'INFO: Checking the zip archive of {} for inconsistency'.format( filename)) zip_test = h.check_zipfile(filename) # if it did not pass the test, remove the file # in the while loop it will be downlaoded again if zip_test is not None: logger.debug('INFO: {} did not pass the zip test. \ Re-downloading the full scene.'.format(filename)) os.remove(filename) first_byte = 0 # otherwise we change the status to True else: logger.debug('INFO: {} passed the zip test.'.format(filename)) with open(str('{}.downloaded'.format(filename)), 'w') as file: file.write('successfully downloaded \n') return str('{}.downloaded'.format(filename))
def get(self, options, url): cj = CookieJar() match = re.search(".*video/([0-9]+)", url) if not match: log.error("Can't find video file") sys.exit(2) video_id = match.group(1) if options.username and options.password: #bogus cc = Cookie(None, 'asdf', None, '80', '80', 'www.kanal5play.se', None, None, '/', None, False, False, 'TestCookie', None, None, None) cj.set_cookie(cc) #get session cookie data = get_http_data("http://www.kanal5play.se/", cookiejar=cj) authurl = "https://kanal5swe.appspot.com/api/user/login?callback=jQuery171029989&email=%s&password=%s&_=136250" % (options.username, options.password) data = get_http_data(authurl) match = re.search("({.*})\);", data) jsondata = json.loads(match.group(1)) if jsondata["success"] == False: log.error(jsondata["message"]) sys.exit(2) authToken = jsondata["userData"]["auth"] cc = Cookie(version=0, name='authToken', value=authToken, port=None, port_specified=False, domain='www.kanal5play.se', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}) cj.set_cookie(cc) format = "FLASH" if options.hls: format = "IPHONE" url = "http://www.kanal5play.se/api/getVideo?format=%s&videoId=%s" % (format, video_id) data = json.loads(get_http_data(url, cookiejar=cj)) options.live = data["isLive"] if data["hasSubtitle"]: subtitle = "http://www.kanal5play.se/api/subtitles/%s" % video_id if options.hls: url = data["streams"][0]["source"] baseurl = url[0:url.rfind("/")] if data["streams"][0]["drmProtected"]: log.error("We cant download drm files for this site.") sys.exit(2) download_hls(options, url, baseurl) else: steambaseurl = data["streamBaseUrl"] streams = {} for i in data["streams"]: stream = {} stream["source"] = i["source"] streams[int(i["bitrate"])] = stream test = select_quality(options, streams) filename = test["source"] match = re.search("^(.*):", filename) options.other = "-W %s -y %s " % ("http://www.kanal5play.se/flash/K5StandardPlayer.swf", filename) download_rtmp(options, steambaseurl) if options.subtitle: if options.output != "-": data = get_http_data(subtitle, cookiejar=cj) subtitle_json(options, data)
from urllib.request import Request, build_opener, HTTPCookieProcessor from http.cookiejar import CookieJar from urllib import parse cookie_jar = CookieJar() handler = HTTPCookieProcessor(cookiejar=cookie_jar) opener = build_opener(handler) url = 'http://www.renren.com/PLogin.do' data = {'email': "*****@*****.**", 'password': '******'} data = parse.urlencode(data).encode() request = Request(url, data=data) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36') response = opener.open(request) print(response.read().decode())
def init_basicauth(config, config_mtime): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(self, *args, **kwargs): # check if this is a recursive call (note: we do not # have to care about thread safety) is_rec_call = getattr(self, '_orig_stdout', None) is not None try: if not is_rec_call: self._orig_stdout = sys.stdout sys.stdout = StringIO() meth(self, *args, **kwargs) hdr = sys.stdout.getvalue() finally: # restore original stdout if not is_rec_call: sys.stdout = self._orig_stdout del self._orig_stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if sys.version_info < (2, 6): # HTTPS proxy is not supported in old urllib2. It only leads to an error # or, at best, a warning. if 'https_proxy' in os.environ: del os.environ['https_proxy'] if 'HTTPS_PROXY' in os.environ: del os.environ['HTTPS_PROXY'] if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) if int(round(config_mtime)) > int(os.stat(cookie_file).st_mtime): cookiejar.clear() cookiejar.save() except IOError: try: fd = os.open(cookie_file, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600) os.close(fd) except IOError: # hmm is any good reason why we should catch the IOError? #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
def download_tile(tile, url, pid, srtmv3, one, username, password): grass.debug("Download tile: %s" % tile, debug=1) output = tile + ".r.in.srtm.tmp." + str(pid) if srtmv3: if one: local_tile = str(tile) + ".SRTMGL1.hgt.zip" else: local_tile = str(tile) + ".SRTMGL3.hgt.zip" else: local_tile = str(tile) + ".hgt.zip" urllib2.urlcleanup() if srtmv3: remote_tile = str(url) + local_tile goturl = 1 try: password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password) cookie_jar = CookieJar() opener = urllib2.build_opener( urllib2.HTTPBasicAuthHandler(password_manager), # urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see # urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses urllib2.HTTPCookieProcessor(cookie_jar), ) urllib2.install_opener(opener) request = urllib2.Request(remote_tile) response = urllib2.urlopen(request) fo = open(local_tile, "w+b") fo.write(response.read()) fo.close time.sleep(0.5) except: goturl = 0 pass return goturl # SRTM subdirs: Africa, Australia, Eurasia, Islands, North_America, South_America for srtmdir in ( "Africa", "Australia", "Eurasia", "Islands", "North_America", "South_America", ): remote_tile = str(url) + str(srtmdir) + "/" + local_tile goturl = 1 try: response = urllib2.urlopen(request) fo = open(local_tile, "w+b") fo.write(response.read()) fo.close time.sleep(0.5) # does not work: # urllib.urlretrieve(remote_tile, local_tile, data = None) except: goturl = 0 pass if goturl == 1: return 1 return 0
def __init__(self): self.cookie_jar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))
def get_cookie_jar(): if not hasattr(get_cookie_jar, 'memo'): get_cookie_jar.memo = CookieJar() return get_cookie_jar.memo
def process_request(self, request, spider): cookie_jar = request.meta.setdefault('cookie_jar', CookieJar()) cookie_jar.extract_cookies(request, request) request.meta['cookie_jar'] = cookie_jar request.meta['dont_merge_cookies'] = True
class FoggyCam(object): """FoggyCam client class that performs capture operations.""" nest_username = '' nest_password = '' nest_user_id = '' nest_access_token = '' nest_access_token_expiration = '' nest_current_user = None nest_session_url = 'https://home.nest.com/session' nest_user_url = 'https://home.nest.com/api/0.1/user/#USERID#/app_launch' nest_api_login_url = 'https://webapi.camera.home.nest.com/api/v1/login.login_nest' nest_image_url = 'https://nexusapi-us1.camera.home.nest.com/get_image?uuid=#CAMERAID#&width=#WIDTH#&cachebuster=#CBUSTER#' nest_verify_pin_url = 'https://home.nest.com/api/0.1/2fa/verify_pin' nest_user_request_payload = { "known_bucket_types": ["quartz"], "known_bucket_versions": [] } nest_camera_array = [] nest_camera_buffer_threshold = 50 is_capturing = False cookie_jar = None merlin = None temp_dir_path = '' local_path = '' def __init__(self, username, password): self.nest_password = password self.nest_username = username self.cookie_jar = CookieJar() self.merlin = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(self.cookie_jar)) if not os.path.exists('_temp'): os.makedirs('_temp') self.local_path = os.path.dirname(os.path.abspath(__file__)) self.temp_dir_path = os.path.join(self.local_path, '_temp') # It's important to try and load the cookies first to check # if we can avoid logging in. try: self.unpickle_cookies() utc_date = datetime.utcnow() utc_millis_str = str(int(utc_date.timestamp()) * 1000) self.initialize_twof_session(utc_millis_str) except: print("Failed to re-use the cookies. Re-initializing session...") self.initialize_session() self.login() self.initialize_user() def unpickle_cookies(self): """Get local cookies and load them into the cookie jar.""" print("Unpickling cookies...") with open("cookies.bin", 'rb') as f: pickled_cookies = pickle.load(f) for pickled_cookie in pickled_cookies: self.cookie_jar.set_cookie(pickled_cookie) cookie_data = dict( (cookie.name, cookie.value) for cookie in self.cookie_jar) self.nest_access_token = cookie_data["cztoken"] def pickle_cookies(self): """Store the cookies locally to reduce auth calls.""" print("Pickling cookies...") pickle.dump([c for c in self.cookie_jar], open("cookies.bin", "wb")) def initialize_twof_session(self, time_token): """Creates the first session to get the access token and cookie, with 2FA enabled.""" print("Intializing 2FA session...") target_url = self.nest_session_url + "?=_" + time_token print(target_url) try: request = urllib.request.Request(target_url) request.add_header('Authorization', 'Basic %s' % self.nest_access_token) response = self.merlin.open(request) session_data = response.read() session_json = json.loads(session_data) self.nest_access_token = session_json['access_token'] self.nest_access_token_expiration = session_json['expires_in'] self.nest_user_id = session_json['userid'] self.pickle_cookies() except urllib.request.HTTPError as err: print(err) def initialize_session(self): """Creates the first session to get the access token and cookie.""" print('INFO: Initializing session...') payload = {'email': self.nest_username, 'password': self.nest_password} binary_data = json.dumps(payload).encode('utf-8') request = urllib.request.Request(self.nest_session_url, binary_data) request.add_header('Content-Type', 'application/json') try: response = self.merlin.open(request) session_data = response.read() session_json = json.loads(session_data) self.nest_access_token = session_json['access_token'] self.nest_access_token_expiration = session_json['expires_in'] self.nest_user_id = session_json['userid'] print('INFO: [PARSED] Captured authentication token:') print(self.nest_access_token) print('INFO: [PARSED] Captured expiration date for token:') print(self.nest_access_token_expiration) cookie_data = dict( (cookie.name, cookie.value) for cookie in self.cookie_jar) for cookie in cookie_data: print(cookie) print('INFO: [COOKIE] Captured authentication token:') print(cookie_data["cztoken"]) except urllib.request.HTTPError as err: if err.code == 401: error_message = err.read() unauth_content = json.loads(error_message) if unauth_content["status"].lower() == "verification_pending": print("Pending 2FA verification!") two_factor_token = unauth_content["2fa_token"] phone_truncated = unauth_content["truncated_phone_number"] print("Enter PIN you just received on number ending with", phone_truncated) pin = input() payload = {"pin": pin, "2fa_token": two_factor_token} binary_data = json.dumps(payload).encode('utf-8') request = urllib.request.Request(self.nest_verify_pin_url, binary_data) request.add_header('Content-Type', 'application/json') try: response = self.merlin.open(request) pin_attempt = response.read() parsed_pin_attempt = json.loads(pin_attempt) if parsed_pin_attempt["status"].lower( ) == "id_match_positive": print("2FA verification successful.") utc_date = datetime.utcnow() utc_millis_str = str( int(utc_date.timestamp()) * 1000) print("Targetting new session with timestamp: ", utc_millis_str) cookie_data = dict((cookie.name, cookie.value) for cookie in self.cookie_jar) print( 'INFO: [COOKIE] Captured authentication token:' ) print(cookie_data["cztoken"]) self.nest_access_token = parsed_pin_attempt[ 'access_token'] self.initialize_twof_session(utc_millis_str) else: print("Could not verify. Exiting...") exit() except: traceback.print_exc() print("Failed 2FA checks. Exiting...") exit() print('INFO: Session initialization complete!') def login(self): """Performs user login to get the website_2 cookie.""" print('INFO: Performing user login...') post_data = {'access_token': self.nest_access_token} post_data = urllib.parse.urlencode(post_data) binary_data = post_data.encode('utf-8') print("INFO: Auth post data") print(post_data) request = urllib.request.Request(self.nest_api_login_url, data=binary_data) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self.merlin.open(request) session_data = response.read() print(session_data) def initialize_user(self): """Gets the assets belonging to Nest user.""" print('INFO: Initializing current user...') user_url = self.nest_user_url.replace('#USERID#', self.nest_user_id) print('INFO: Requesting user data from:') print(user_url) binary_data = json.dumps( self.nest_user_request_payload).encode('utf-8') request = urllib.request.Request(user_url, binary_data) request.add_header('Content-Type', 'application/json') request.add_header('Authorization', 'Basic %s' % self.nest_access_token) response = self.merlin.open(request) response_data = response.read() print(response_data) user_object = json.loads(response_data) for bucket in user_object['updated_buckets']: bucket_id = bucket['object_key'] if bucket_id.startswith('quartz.'): camera_id = bucket_id.replace('quartz.', '') print('INFO: Detected camera configuration.') print(bucket) print('INFO: Camera UUID:') print(camera_id) self.nest_camera_array.append(camera_id) def capture_images(self, config=None): """Starts the multi-threaded image capture process.""" print('INFO: Capturing images...') self.is_capturing = True if not os.path.exists('capture'): os.makedirs('capture') self.nest_camera_buffer_threshold = config.threshold for camera in self.nest_camera_array: camera_path = '' video_path = '' # Determine whether the entries should be copied to a custom path # or not. if not config.path: camera_path = os.path.join(self.local_path, 'capture', camera, 'images') video_path = os.path.join(self.local_path, 'capture', camera, 'video') else: camera_path = os.path.join(config.path, 'capture', camera, 'images') video_path = os.path.join(config.path, 'capture', camera, 'video') # Provision the necessary folders for images and videos. if not os.path.exists(camera_path): os.makedirs(camera_path) if not os.path.exists(video_path): os.makedirs(video_path) image_thread = threading.Thread(target=self.perform_capture, args=(config, camera, camera_path, video_path)) image_thread.daemon = True image_thread.start() while True: time.sleep(1) def perform_capture(self, config=None, camera=None, camera_path='', video_path=''): """Captures images and generates the video from them.""" camera_buffer = defaultdict(list) while self.is_capturing: file_id = str(uuid.uuid4().hex) utc_date = datetime.utcnow() utc_millis_str = str(int(utc_date.timestamp()) * 1000) print('Applied cache buster: ', utc_millis_str) image_url = self.nest_image_url.replace( '#CAMERAID#', camera).replace('#CBUSTER#', utc_millis_str).replace( '#WIDTH#', str(config.width)) request = urllib.request.Request(image_url) request.add_header('accept', 'image/webp,image/apng,image/*,*/*;q=0.9') request.add_header('accept-encoding', 'gzip, deflate, br') request.add_header( 'user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' ) request.add_header('referer', 'https://home.nest.com/') request.add_header('authority', 'nexusapi-us1.camera.home.nest.com') try: response = self.merlin.open(request) time.sleep(config.capture_delay) with open(camera_path + '/' + file_id + '.jpg', 'wb') as image_file: for chunk in response: image_file.write(chunk) # Check if we need to compile a video if config.produce_video: camera_buffer_size = len(camera_buffer[camera]) print('[', threading.current_thread().name, '] INFO: Camera buffer size for ', camera, ': ', camera_buffer_size) if camera_buffer_size < self.nest_camera_buffer_threshold: camera_buffer[camera].append(file_id) else: camera_image_folder = os.path.join( self.local_path, camera_path) # Build the batch of files that need to be made into a video. file_declaration = '' for buffer_entry in camera_buffer[camera]: file_declaration = file_declaration + 'file \'' + camera_image_folder + '/' + buffer_entry + '.jpg\'\n' concat_file_name = os.path.join( self.temp_dir_path, camera + '.txt') # Make sure that the content is decoded with open(concat_file_name, 'w') as declaration_file: declaration_file.write(file_declaration) # Check if we have ffmpeg locally use_terminal = False ffmpeg_path = '' if shutil.which("ffmpeg"): ffmpeg_path = 'ffmpeg' use_terminal = True else: ffmpeg_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'tools', 'ffmpeg')) if use_terminal or (os.path.isfile(ffmpeg_path) and use_terminal is False): print('INFO: Found ffmpeg. Processing video!') target_video_path = os.path.join( video_path, file_id + '.mp4') process = Popen([ ffmpeg_path, '-r', str(config.frame_rate), '-f', 'concat', '-safe', '0', '-i', concat_file_name, '-vcodec', 'libx264', '-crf', '25', '-pix_fmt', 'yuv420p', target_video_path ], stdout=PIPE, stderr=PIPE) process.communicate() os.remove(concat_file_name) print('INFO: Video processing is complete!') # Upload the video storage_provider = AzureStorageProvider() if bool(config.upload_to_azure): print('INFO: Uploading to Azure Storage...') target_blob = 'foggycam/' + camera + '/' + file_id + '.mp4' storage_provider.upload_video( account_name=config.az_account_name, sas_token=config.az_sas_token, container='foggycam', blob=target_blob, path=target_video_path) print('INFO: Upload complete.') # If the user specified the need to remove images post-processing # then clear the image folder from images in the buffer. if config.clear_images: for buffer_entry in camera_buffer[camera]: deletion_target = os.path.join( camera_path, buffer_entry + '.jpg') print('INFO: Deleting ' + deletion_target) os.remove(deletion_target) else: print( 'WARNING: No ffmpeg detected. Make sure the binary is in /tools.' ) # Empty buffer, since we no longer need the file records that we're planning # to compile in a video. camera_buffer[camera] = [] except urllib.request.HTTPError as err: if err.code == 403: self.initialize_session() self.login() self.initialize_user() except Exception: print('ERROR: Could not download image from URL:') print(image_url) traceback.print_exc()
# CookieJar有一些子类,分别是FileCookieJar,LWPCookieJar,MoziliaCookieJar # CookieJar管理http生成的cookie,负责cookie的存储工作, 向http当中 # 添加指定的cookie from http.cookiejar import CookieJar,LWPCookieJar from urllib.request import Request,HTTPCookieProcessor,build_opener from urllib.parse import urlencode import ssl ssl._create_default_https_context = ssl._create_unverified_context # ----------------------------获取cookie # 生成一个管理cookie的对象 cookie_obj = CookieJar() # 创建一个支持cookie的对象,对象属于HHTTPCookieProcessor cookie_handler = HTTPCookieProcessor(cookie_obj) #quote(url,safe=string.printable) # build_opene内部的实现就是urlopen # urlopen 只能进行简单的请求,不支持在线验证,cookie,代理等复杂操作 opener = build_opener(cookie_handler) response = opener.open('http://www.neihanshequ.com') print(response) for cookie in cookie_obj: print('key:',cookie.name) print('value:',cookie.value) cookie_obj # ----------------------------------保存cookie filename = 'neihan.txt' # 设置cookie保存的文件 cookie_obj = LWPCookieJar(filename=filename)
class HttpTransport(Transport): """ HTTP transport using urllib2. Provided basic http transport that provides for cookies, proxies but no authentication. """ def __init__(self, **kwargs): """ @param kwargs: Keyword arguments. - B{proxy} - An http proxy to be specified on requests. The proxy is defined as {protocol:proxy,} - type: I{dict} - default: {} - B{timeout} - Set the url open timeout (seconds). - type: I{float} - default: 90 """ Transport.__init__(self) Unskin(self.options).update(kwargs) self.cookiejar = CookieJar() self.proxy = {} self.urlopener = None def open(self, request): try: url = request.url headers = request.headers log.debug('opening (%s)', url) u2request = u2.Request(url, None, headers) self.proxy = self.options.proxy return self.u2open(u2request) except HTTPError as e: raise TransportError(str(e), e.code, e.fp) def send(self, request): result = None url = request.url msg = request.message headers = request.headers try: u2request = u2.Request(url, msg, headers) self.addcookies(u2request) self.proxy = self.options.proxy request.headers.update(u2request.headers) log.debug('sending:\n%s', request) fp = self.u2open(u2request) self.getcookies(fp, u2request) result = Reply(200, fp.headers, fp.read()) log.debug('received:\n%s', result) except HTTPError as e: if e.code in (202, 204): result = None else: raise TransportError(e.msg, e.code, e.fp) return result def addcookies(self, u2request): """ Add cookies in the cookiejar to the request. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.add_cookie_header(u2request) def getcookies(self, fp, u2request): """ Add cookies in the request to the cookiejar. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.extract_cookies(fp, u2request) def u2open(self, u2request): """ Open a connection. @param u2request: A urllib2 request. @type u2request: urllib2.Requet. @return: The opened file-like urllib2 object. @rtype: fp """ tm = self.options.timeout url = self.u2opener() if self.u2ver() < 2.6: socket.setdefaulttimeout(tm) return url.open(u2request) else: return url.open(u2request, timeout=tm) def u2opener(self): """ Create a urllib opener. @return: An opener. @rtype: I{OpenerDirector} """ if self.urlopener is None: return u2.build_opener(*self.u2handlers()) else: return self.urlopener def u2handlers(self): """ Get a collection of urllib handlers. @return: A list of handlers to be installed in the opener. @rtype: [Handler,...] """ handlers = [] handlers.append(u2.ProxyHandler(self.proxy)) return handlers def u2ver(self): """ Get the major/minor version of the urllib2 lib. @return: The urllib2 version. @rtype: float """ try: part = u2.__version__.split('.', 1) n = float('.'.join(part)) return n except Exception as e: log.exception(e) return 0 def __deepcopy__(self, memo={}): clone = self.__class__() p = Unskin(self.options) cp = Unskin(clone.options) cp.update(p) return clone
class QuartClient: http_connection_class: Type[TestHTTPConnectionProtocol] websocket_connection_class: Type[TestWebsocketConnectionProtocol] http_connection_class = TestHTTPConnection websocket_connection_class = TestWebsocketConnection def __init__(self, app: "Quart", use_cookies: bool = True) -> None: self.app = app self.cookie_jar: Optional[CookieJar] if use_cookies: self.cookie_jar = CookieJar() else: self.cookie_jar = None self.preserve_context = False self.push_promises: List[Tuple[str, Headers]] = [] async def open( self, path: str, *, method: str = "GET", headers: Optional[Union[dict, Headers]] = None, data: Optional[AnyStr] = None, form: Optional[dict] = None, query_string: Optional[dict] = None, json: Any = sentinel, scheme: str = "http", follow_redirects: bool = False, root_path: str = "", http_version: str = "1.1", ) -> Response: self.push_promises = [] response = await self._make_request(path, method, headers, data, form, query_string, json, scheme, root_path, http_version) if follow_redirects: while response.status_code >= 300 and response.status_code <= 399: # Most browsers respond to an HTTP 302 with a GET request to the new location, # despite what the HTTP spec says. HTTP 303 should always be responded to with # a GET request. if response.status_code == 302 or response.status_code == 303: method = "GET" response = await self._make_request( response.location, method, headers, data, form, query_string, json, scheme, root_path, http_version, ) if self.preserve_context: _request_ctx_stack.push(self.app._preserved_context) return response def request( self, path: str, *, method: str = "GET", headers: Optional[Union[dict, Headers]] = None, query_string: Optional[dict] = None, scheme: str = "http", root_path: str = "", http_version: str = "1.1", ) -> TestHTTPConnectionProtocol: headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.app, path, headers, query_string) scope = self._build_scope("http", path, method, headers, query_string_bytes, scheme, root_path, http_version) return self.http_connection_class( self.app, scope, _preserve_context=self.preserve_context) def websocket( self, path: str, *, headers: Optional[Union[dict, Headers]] = None, query_string: Optional[dict] = None, scheme: str = "ws", subprotocols: Optional[List[str]] = None, root_path: str = "", http_version: str = "1.1", ) -> TestWebsocketConnectionProtocol: headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.app, path, headers, query_string) scope = self._build_scope("websocket", path, "GET", headers, query_string_bytes, scheme, root_path, http_version) return self.websocket_connection_class(self.app, scope) async def delete(self, *args: Any, **kwargs: Any) -> Response: """Make a DELETE request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="DELETE", **kwargs) async def get(self, *args: Any, **kwargs: Any) -> Response: """Make a GET request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="GET", **kwargs) async def head(self, *args: Any, **kwargs: Any) -> Response: """Make a HEAD request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="HEAD", **kwargs) async def options(self, *args: Any, **kwargs: Any) -> Response: """Make a OPTIONS request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="OPTIONS", **kwargs) async def patch(self, *args: Any, **kwargs: Any) -> Response: """Make a PATCH request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="PATCH", **kwargs) async def post(self, *args: Any, **kwargs: Any) -> Response: """Make a POST request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="POST", **kwargs) async def put(self, *args: Any, **kwargs: Any) -> Response: """Make a PUT request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="PUT", **kwargs) async def trace(self, *args: Any, **kwargs: Any) -> Response: """Make a TRACE request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="TRACE", **kwargs) def set_cookie( self, server_name: str, key: str, value: str = "", max_age: Optional[Union[int, timedelta]] = None, expires: Optional[Union[int, float, datetime]] = None, path: str = "/", domain: Optional[str] = None, secure: bool = False, httponly: bool = False, samesite: str = None, charset: str = "utf-8", ) -> None: """Set a cookie in the cookie jar. The arguments are the standard cookie morsels and this is a wrapper around the stdlib SimpleCookie code. """ cookie = dump_cookie( # type: ignore key, value=value, max_age=max_age, expires=expires, path=path, domain=domain, secure=secure, httponly=httponly, charset=charset, samesite=samesite, ) self.cookie_jar.extract_cookies( _TestCookieJarResponse(Headers([("set-cookie", cookie) ])), # type: ignore U2Request(f"http://{server_name}{path}"), ) def delete_cookie(self, server_name: str, key: str, path: str = "/", domain: Optional[str] = None) -> None: """Delete a cookie (set to expire immediately).""" self.set_cookie(server_name, key, expires=0, max_age=0, path=path, domain=domain) @asynccontextmanager async def session_transaction( self, path: str = "/", *, method: str = "GET", headers: Optional[Union[dict, Headers]] = None, query_string: Optional[dict] = None, scheme: str = "http", data: Optional[AnyStr] = None, form: Optional[dict] = None, json: Any = sentinel, root_path: str = "", http_version: str = "1.1", ) -> AsyncGenerator[Session, None]: if self.cookie_jar is None: raise RuntimeError( "Session transactions only make sense with cookies enabled.") if headers is None: headers = Headers() elif isinstance(headers, Headers): headers = headers elif headers is not None: headers = Headers(headers) for cookie in self.cookie_jar: headers.add("cookie", f"{cookie.name}={cookie.value}") original_request_ctx = _request_ctx_stack.top async with self.app.test_request_context( path, method=method, headers=headers, query_string=query_string, scheme=scheme, data=data, form=form, json=json, root_path=root_path, http_version=http_version, ) as ctx: session_interface = self.app.session_interface session = await session_interface.open_session( self.app, ctx.request) if session is None: raise RuntimeError( "Error opening the sesion. Check the secret_key?") _request_ctx_stack.push(original_request_ctx) try: yield session finally: _request_ctx_stack.pop() response = self.app.response_class(b"") if not session_interface.is_null_session(session): await session_interface.save_session(self.app, session, response) self.cookie_jar.extract_cookies( _TestCookieJarResponse(response.headers), # type: ignore U2Request(ctx.request.url), ) async def __aenter__(self) -> "QuartClient": if self.preserve_context: raise RuntimeError("Cannot nest client invocations") self.preserve_context = True return self async def __aexit__(self, exc_type: type, exc_value: BaseException, tb: TracebackType) -> None: self.preserve_context = False while True: top = _request_ctx_stack.top if top is not None and top.preserved: await top.pop(None) else: break async def _make_request( self, path: str, method: str, headers: Optional[Union[dict, Headers]], data: Optional[AnyStr], form: Optional[dict], query_string: Optional[dict], json: Any, scheme: str, root_path: str, http_version: str, ) -> Response: headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.app, path, headers, query_string) request_data, body_headers = make_test_body_with_headers( data, form, json, self.app) headers.update(**body_headers) # type: ignore if self.cookie_jar is not None: for cookie in self.cookie_jar: headers.add("cookie", f"{cookie.name}={cookie.value}") scope = self._build_scope("http", path, method, headers, query_string_bytes, scheme, root_path, http_version) async with self.http_connection_class( self.app, scope, _preserve_context=self.preserve_context) as connection: await connection.send(request_data) await connection.send_complete() response = await connection.as_response() if self.cookie_jar is not None: self.cookie_jar.extract_cookies( _TestCookieJarResponse(response.headers), # type: ignore U2Request(f"{scheme}://{headers['host']}{path}"), ) self.push_promises.extend(connection.push_promises) return response @overload def _build_scope( self, type_: Literal["http"], path: str, method: str, headers: Headers, query_string: bytes, scheme: str, root_path: str, http_version: str, ) -> HTTPScope: ... @overload def _build_scope( self, type_: Literal["websocket"], path: str, method: str, headers: Headers, query_string: bytes, scheme: str, root_path: str, http_version: str, ) -> WebsocketScope: ... def _build_scope( self, type_: str, path: str, method: str, headers: Headers, query_string: bytes, scheme: str, root_path: str, http_version: str, ) -> Scope: scope = { "type": type_, "http_version": http_version, "asgi": { "spec_version": "2.1" }, "method": method, "scheme": scheme, "path": path, "raw_path": path.encode("ascii"), "query_string": query_string, "root_path": root_path, "headers": encode_headers(headers), "extensions": {}, "_quart._preserve_context": self.preserve_context, } if type_ == "http" and http_version in {"2", "3"}: scope["extensions"] = {"http.response.push": {}} elif type_ == "websocket": scope["extensions"] = {"websocket.http.response": {}} return cast(Scope, scope)
class QuartClient: """A Client bound to an app for testing. This should be used to make requests and receive responses from the app for testing purposes. This is best used via :attr:`~quart.app.Quart.test_client` method. """ def __init__(self, app: "Quart", use_cookies: bool = True) -> None: self.cookie_jar: Optional[CookieJar] if use_cookies: self.cookie_jar = CookieJar() else: self.cookie_jar = None self.app = app self.push_promises: List[Tuple[str, Headers]] = [] async def open( self, path: str, *, method: str = "GET", headers: Optional[Union[dict, Headers]] = None, data: Optional[AnyStr] = None, form: Optional[dict] = None, query_string: Optional[dict] = None, json: Any = sentinel, scheme: str = "http", follow_redirects: bool = False, root_path: str = "", http_version: str = "1.1", ) -> Response: """Open a request to the app associated with this client. Arguments: path: The path to request. If the query_string argument is not defined this argument will be partitioned on a '?' with the following part being considered the query_string. method: The method to make the request with, defaults to 'GET'. headers: Headers to include in the request. data: Raw data to send in the request body. form: Data to send form encoded in the request body. query_string: To send as a dictionary, alternatively the query_string can be determined from the path. json: Data to send json encoded in the request body. scheme: The scheme to use in the request, default http. follow_redirects: Whether or not a redirect response should be followed, defaults to False. Returns: The response from the app handling the request. """ response = await self._make_request( path, method, headers, data, form, query_string, json, scheme, root_path, http_version ) if follow_redirects: while response.status_code >= 300 and response.status_code <= 399: # Most browsers respond to an HTTP 302 with a GET request to the new location, # despite what the HTTP spec says. HTTP 303 should always be responded to with # a GET request. if response.status_code == 302 or response.status_code == 303: method = "GET" response = await self._make_request( response.location, method, headers, data, form, query_string, json, scheme, root_path, http_version, ) return response async def _make_request( self, path: str, method: str = "GET", headers: Optional[Union[dict, Headers]] = None, data: Optional[AnyStr] = None, form: Optional[dict] = None, query_string: Optional[dict] = None, json: Any = sentinel, scheme: str = "http", root_path: str = "", http_version: str = "1.1", ) -> Response: headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.app, path, headers, query_string ) request_data, body_headers = make_test_body_with_headers(data, form, json, self.app) # Replace with headers.update(**body_headers) when Werkzeug # supports https://github.com/pallets/werkzeug/pull/1687 for key, value in body_headers.items(): headers[key] = value if self.cookie_jar is not None: for cookie in self.cookie_jar: headers.add("cookie", f"{cookie.name}={cookie.value}") request = self.app.request_class( method, scheme, path, query_string_bytes, headers, root_path, http_version, send_push_promise=self._send_push_promise, ) request.body.set_result(request_data) response = await self._handle_request(request) if self.cookie_jar is not None: self.cookie_jar.extract_cookies( _TestCookieJarResponse(response.headers), # type: ignore U2Request(request.url), ) return response async def _handle_request(self, request: Request) -> Response: return await asyncio.ensure_future(self.app.handle_request(request)) async def _send_push_promise(self, path: str, headers: Headers) -> None: self.push_promises.append((path, headers)) async def delete(self, *args: Any, **kwargs: Any) -> Response: """Make a DELETE request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="DELETE", **kwargs) async def get(self, *args: Any, **kwargs: Any) -> Response: """Make a GET request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="GET", **kwargs) async def head(self, *args: Any, **kwargs: Any) -> Response: """Make a HEAD request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="HEAD", **kwargs) async def options(self, *args: Any, **kwargs: Any) -> Response: """Make a OPTIONS request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="OPTIONS", **kwargs) async def patch(self, *args: Any, **kwargs: Any) -> Response: """Make a PATCH request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="PATCH", **kwargs) async def post(self, *args: Any, **kwargs: Any) -> Response: """Make a POST request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="POST", **kwargs) async def put(self, *args: Any, **kwargs: Any) -> Response: """Make a PUT request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="PUT", **kwargs) async def trace(self, *args: Any, **kwargs: Any) -> Response: """Make a TRACE request. See :meth:`~quart.testing.QuartClient.open` for argument details. """ return await self.open(*args, method="TRACE", **kwargs) def set_cookie( self, server_name: str, key: str, value: str = "", max_age: Optional[Union[int, timedelta]] = None, expires: Optional[Union[int, float, datetime]] = None, path: str = "/", domain: Optional[str] = None, secure: bool = False, httponly: bool = False, samesite: str = None, charset: str = "utf-8", ) -> None: """Set a cookie in the cookie jar. The arguments are the standard cookie morsels and this is a wrapper around the stdlib SimpleCookie code. """ cookie = dump_cookie( # type: ignore key, value=value, max_age=max_age, expires=expires, path=path, domain=domain, secure=secure, httponly=httponly, charset=charset, samesite=samesite, ) self.cookie_jar.extract_cookies( _TestCookieJarResponse(Headers([("set-cookie", cookie)])), # type: ignore U2Request(f"http://{server_name}{path}"), ) def delete_cookie( self, server_name: str, key: str, path: str = "/", domain: Optional[str] = None ) -> None: """Delete a cookie (set to expire immediately).""" self.set_cookie(server_name, key, expires=0, max_age=0, path=path, domain=domain) @asynccontextmanager async def websocket( self, path: str, *, headers: Optional[Union[dict, Headers]] = None, query_string: Optional[dict] = None, scheme: str = "ws", subprotocols: Optional[List[str]] = None, root_path: str = "", http_version: str = "1.1", ) -> AsyncGenerator[_TestingWebsocket, None]: headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.app, path, headers, query_string ) queue: asyncio.Queue = asyncio.Queue() websocket_client = _TestingWebsocket(queue) subprotocols = subprotocols or [] websocket = self.app.websocket_class( path, query_string_bytes, scheme, headers, root_path, http_version, subprotocols, queue.get, websocket_client.local_queue.put, websocket_client.accept, ) adapter = self.app.create_url_adapter(websocket) try: adapter.match() except WBadRequest: raise BadRequest() websocket_client.task = asyncio.ensure_future(self.app.handle_websocket(websocket)) try: yield websocket_client finally: websocket_client.task.cancel()
def set_api_key(self, api_key): if api_key == 'YOUR API KEY': api_key = None self.api_key = api_key self.jar = CookieJar()
class Cookies(MutableMapping): """ HTTP Cookies, as a mutable mapping. """ def __init__(self, cookies: CookieTypes = None) -> None: if cookies is None or isinstance(cookies, dict): self.jar = CookieJar() if isinstance(cookies, dict): for key, value in cookies.items(): self.set(key, value) elif isinstance(cookies, list): self.jar = CookieJar() for key, value in cookies: self.set(key, value) elif isinstance(cookies, Cookies): self.jar = CookieJar() for cookie in cookies.jar: self.jar.set_cookie(cookie) else: self.jar = cookies def extract_cookies(self, response: Response) -> None: """ Loads any cookies based on the response `Set-Cookie` headers. """ urlib_response = self._CookieCompatResponse(response) urllib_request = self._CookieCompatRequest(response.request) self.jar.extract_cookies(urlib_response, urllib_request) # type: ignore def set_cookie_header(self, request: Request) -> None: """ Sets an appropriate 'Cookie:' HTTP header on the `Request`. """ urllib_request = self._CookieCompatRequest(request) self.jar.add_cookie_header(urllib_request) def set(self, name: str, value: str, domain: str = "", path: str = "/") -> None: """ Set a cookie value by name. May optionally include domain and path. """ kwargs = { "version": 0, "name": name, "value": value, "port": None, "port_specified": False, "domain": domain, "domain_specified": bool(domain), "domain_initial_dot": domain.startswith("."), "path": path, "path_specified": bool(path), "secure": False, "expires": None, "discard": True, "comment": None, "comment_url": None, "rest": {"HttpOnly": None}, "rfc2109": False, } cookie = Cookie(**kwargs) # type: ignore self.jar.set_cookie(cookie) def get( # type: ignore self, name: str, default: str = None, domain: str = None, path: str = None ) -> typing.Optional[str]: """ Get a cookie by name. May optionally include domain and path in order to specify exactly which cookie to retrieve. """ value = None for cookie in self.jar: if cookie.name == name: if domain is None or cookie.domain == domain: if path is None or cookie.path == path: if value is not None: message = f"Multiple cookies exist with name={name}" raise CookieConflict(message) value = cookie.value if value is None: return default return value def delete(self, name: str, domain: str = None, path: str = None) -> None: """ Delete a cookie by name. May optionally include domain and path in order to specify exactly which cookie to delete. """ if domain is not None and path is not None: return self.jar.clear(domain, path, name) remove = [] for cookie in self.jar: if cookie.name == name: if domain is None or cookie.domain == domain: if path is None or cookie.path == path: remove.append(cookie) for cookie in remove: self.jar.clear(cookie.domain, cookie.path, cookie.name) def clear(self, domain: str = None, path: str = None) -> None: """ Delete all cookies. Optionally include a domain and path in order to only delete a subset of all the cookies. """ args = [] if domain is not None: args.append(domain) if path is not None: assert domain is not None args.append(path) self.jar.clear(*args) def update(self, cookies: CookieTypes = None) -> None: # type: ignore cookies = Cookies(cookies) for cookie in cookies.jar: self.jar.set_cookie(cookie) def __setitem__(self, name: str, value: str) -> None: return self.set(name, value) def __getitem__(self, name: str) -> str: value = self.get(name) if value is None: raise KeyError(name) return value def __delitem__(self, name: str) -> None: return self.delete(name) def __len__(self) -> int: return len(self.jar) def __iter__(self) -> typing.Iterator[str]: return (cookie.name for cookie in self.jar) def __bool__(self) -> bool: for _ in self.jar: return True return False class _CookieCompatRequest(urllib.request.Request): """ Wraps a `Request` instance up in a compatibility interface suitable for use with `CookieJar` operations. """ def __init__(self, request: Request) -> None: super().__init__( url=str(request.url), headers=dict(request.headers), method=request.method, ) self.request = request def add_unredirected_header(self, key: str, value: str) -> None: super().add_unredirected_header(key, value) self.request.headers[key] = value class _CookieCompatResponse: """ Wraps a `Request` instance up in a compatibility interface suitable for use with `CookieJar` operations. """ def __init__(self, response: Response): self.response = response def info(self) -> email.message.Message: info = email.message.Message() for key, value in self.response.headers.multi_items(): # Note that setting `info[key]` here is an "append" operation, # not a "replace" operation. # https://docs.python.org/3/library/email.compat32-message.html#email.message.Message.__setitem__ info[key] = value return info
class HttpTransport(Transport): """ Basic HTTP transport implemented using using urllib2, that provides for cookies & proxies but no authentication. """ def __init__(self, **kwargs): """ @param kwargs: Keyword arguments. - B{proxy} - An HTTP proxy to be specified on requests. The proxy is defined as {protocol:proxy,} - type: I{dict} - default: {} - B{timeout} - Set the URL open timeout (seconds). - type: I{float} - default: 90 """ Transport.__init__(self) Unskin(self.options).update(kwargs) self.cookiejar = CookieJar() self.proxy = {} self.urlopener = None def open(self, request): try: url = self.__get_request_url_for_urllib(request) log.debug('opening (%s)', url) u2request = urllib.request.Request(url) self.proxy = self.options.proxy return self.u2open(u2request) except urllib.error.HTTPError as e: raise TransportError(str(e), e.code, e.fp) def send(self, request): url = self.__get_request_url_for_urllib(request) msg = request.message headers = request.headers try: u2request = urllib.request.Request(url, msg, headers) self.addcookies(u2request) self.proxy = self.options.proxy request.headers.update(u2request.headers) log.debug('sending:\n%s', request) fp = self.u2open(u2request) self.getcookies(fp, u2request) headers = fp.headers if sys.version_info < (3, 0): headers = headers.dict reply = Reply(http.client.OK, headers, fp.read()) log.debug('received:\n%s', reply) return reply except urllib.error.HTTPError as e: if e.code not in (http.client.ACCEPTED, http.client.NO_CONTENT): raise TransportError(e.msg, e.code, e.fp) def addcookies(self, u2request): """ Add cookies in the cookiejar to the request. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Request. """ self.cookiejar.add_cookie_header(u2request) def getcookies(self, fp, u2request): """ Add cookies in the request to the cookiejar. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Request. """ self.cookiejar.extract_cookies(fp, u2request) def u2open(self, u2request): """ Open a connection. @param u2request: A urllib2 request. @type u2request: urllib2.Request. @return: The opened file-like urllib2 object. @rtype: fp """ tm = self.options.timeout url = self.u2opener() if (sys.version_info < (3, 0)) and (self.u2ver() < 2.6): socket.setdefaulttimeout(tm) return url.open(u2request) return url.open(u2request, timeout=tm) def u2opener(self): """ Create a urllib opener. @return: An opener. @rtype: I{OpenerDirector} """ if self.urlopener is None: return urllib.request.build_opener(*self.u2handlers()) return self.urlopener def u2handlers(self): """ Get a collection of urllib handlers. @return: A list of handlers to be installed in the opener. @rtype: [Handler,...] """ return [urllib.request.ProxyHandler(self.proxy)] def u2ver(self): """ Get the major/minor version of the urllib2 lib. @return: The urllib2 version. @rtype: float """ try: part = urllib2.__version__.split('.', 1) return float('.'.join(part)) except Exception as e: log.exception(e) return 0 def __deepcopy__(self, memo={}): clone = self.__class__() p = Unskin(self.options) cp = Unskin(clone.options) cp.update(p) return clone @staticmethod def __get_request_url_for_urllib(request): """ Returns the given request's URL, properly encoded for use with urllib. We expect that the given request object already verified that the URL contains ASCII characters only and stored it as a native str value. urllib accepts URL information as a native str value and may break unexpectedly if given URL information in another format. Python 3.x httplib.client implementation must be given a unicode string and not a bytes object and the given string is internally converted to a bytes object using an explicitly specified ASCII encoding. Python 2.7 httplib implementation expects the URL passed to it to not be a unicode string. If it is, then passing it to the underlying httplib Request object will cause that object to forcefully convert all of its data to unicode, assuming that data contains ASCII data only and raising a UnicodeDecodeError exception if it does not (caused by simple unicode + string concatenation). Python 2.4 httplib implementation does not really care about this as it does not use the internal optimization present in the Python 2.7 implementation causing all the requested data to be converted to unicode. """ assert isinstance(request.url, str) return request.url
from bs4 import BeautifulSoup import threading import traceback from utility.connections import ExtSSHConnection try: # Python 2.x import urllib2 from urllib import urlretrieve except: # Python 3.x from urllib import request as urllib2 from urllib.request import urlretrieve from utility import BackgroundTask from config import user_agent, XML_decoder, request_timeout urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor(CookieJar()))) pac_list = ("google", "xhamster.com", "t66y.com") def PAC_list(url: str) -> bool: return any(( any(( host.find(keyword) >= 0 for keyword in pac_list )) for host in re.findall("^(https://.*?/|http://.*?/)",url) )) class URLReadThread(threading.Thread): def __init__(self, URL=""): super(URLReadThread, self).__init__()
def _on_log_in(self, _widget): import gi gi.require_version('WebKit2', '4.0') from gi.repository import Gtk, WebKit2, Soup loop = GLib.MainLoop() window = Gtk.Window() window.set_title("Log in to GMail") window.set_default_size(800, 600) window.set_destroy_with_parent(True) window.set_transient_for(self._window) window.set_modal(True) scrolled_window = Gtk.ScrolledWindow() window.add(scrolled_window) webview = WebKit2.WebView() webview.load_uri(GMAIL_URL) scrolled_window.add(webview) window.connect("destroy", lambda _: loop.quit()) got_to_inbox = False def _on_load_changed(_, load_event): if load_event != WebKit2.LoadEvent.FINISHED: return uri = webview.get_uri() nonlocal got_to_inbox if uri == "https://mail.google.com/mail/u/0/#inbox": nonlocal got_to_inbox got_to_inbox = True window.destroy() webview.connect("load-changed", _on_load_changed) window.show_all() loop.run() cookie_headers = None def _got_cookies(cookies, result, data): try: headers = [] for cookie in cookies.get_cookies_finish(result): headers.append(cookie.to_set_cookie_header()) nonlocal cookie_headers cookie_headers = headers except: logging.warning("getting cookies failed", exc_info=True) loop.quit() webview.get_context().get_cookie_manager().get_cookies( GMAIL_RSS_URL, None, _got_cookies, None) loop.run() window.destroy() if not cookie_headers: logging.warning("could not set cookies from login") return if not got_to_inbox: logging.warning("login dialog closed before login done") return cookie_jar = CookieJar() cookie_jar.extract_cookies(FakeHTTPResonse(cookie_headers), urllib.request.Request(GMAIL_RSS_URL)) self._set_gmail_credentials(generate_gmail_cookies_key(), cookie_jar) self._on_entry_changed(None)
import unittest from selenium import webdriver import time from os import walk from urllib.request import urlopen f = [] mypath = r"C:\\Users\\lanka\\Desktop\\Lab\\DARPA\\Scrape_Web\\Updated_Downloads\\Journal of Business Research" for (dirpath, dirnames, filenames) in walk(mypath): f.extend(filenames) break # print(f[0]) print("downloaded folder has",len(f)) cj = CookieJar() headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' 'AppleWebKit/537.36 (KHTML, like Gecko)' 'Chrome/80.0.3987.132 Safari/537.36'} pdf_urls = [] issue_urls_unrefined = [] issue_urls = [] start_vol = 62 end_vol = 105 for i in range(start_vol+1, end_vol+1): # print(i) issue_urls_unrefined.append( "https://www.sciencedirect.com/journal/journal-of-business-research/vol/{0}/suppl/C".format(i) ) for ii in range(start_vol, end_vol+1): # print(i) for j in range (1,13):
def __init__(self, base_url=None): base_url = get_default_workflowy_url(base_url) super().__init__(base_url) self.cookie_jar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))
def __init__( self, *, headers: dict = None, method: str = "GET", name: str, off_cmd: str, off_data: Union[Mapping, str] = None, on_cmd: str, on_data: Union[Mapping, str] = None, state_cmd: str = None, state_data: Union[Mapping, str] = None, state_method: str = "GET", state_response_off: str = None, state_response_on: str = None, password: str = None, port: int, user: str = None, ) -> None: """Initialize a SimpleHTTPPlugin instance. Keyword Args: headers: Additional headers for both `on()` and `off()` method: HTTP method to be used for both `on()` and `off()` name: Name of the device off_cmd: URL to be called when turning device off off_data: Optional POST data to turn device off on_cmd: URL to be called when turning device on on_data: Optional POST data to turn device on state_cmd: URL to be called to determine device state state_data: Optional POST data to query device state state_method: HTTP method to be used for `get_state()` state_response_off: If this string is in the response to state_cmd, the device is off. state_response_on: If this string is in the response to state_cmd, the device is on. password: Password for HTTP authentication (basic or digest only) port: Port that this device will run on user: Username for HTTP authentication (basic or digest only) """ self.method = method self.state_method = state_method self.headers = headers or {} self.on_cmd = on_cmd self.off_cmd = off_cmd self.state_cmd = state_cmd self.on_data = self._to_bytes(on_data) self.off_data = self._to_bytes(off_data) self.state_data = self._to_bytes(state_data) self.state_response_on = state_response_on self.state_response_off = state_response_off if user and password: manager = urllib.request.HTTPPasswordMgrWithDefaultRealm() manager.add_password(None, (on_cmd, off_cmd), user, password) jar = CookieJar() cookie_handler = urllib.request.HTTPCookieProcessor(jar) basic_handler = urllib.request.HTTPBasicAuthHandler(manager) digest_handler = urllib.request.HTTPDigestAuthHandler(manager) opener = urllib.request.build_opener(basic_handler, digest_handler, cookie_handler) self.urlopen = opener.open else: self.urlopen = urllib.request.urlopen super().__init__(name=name, port=port)
def next_page_of_friends(screen_name, next_cursor_id=None): """ Raises urllib.error.HTTPError if the user is private or their screen name has changed """ request_url = f"https://mobile.twitter.com/{screen_name}/following" if next_cursor_id: request_url += f"?cursor={next_cursor_id}" cookie_jar = CookieJar() opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cookie_jar)) headers = [('Host', "twitter.com"), ('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"), ('Accept', "application/json, text/javascript, */*; q=0.01"), ('Accept-Language', "de,en-US;q=0.7,en;q=0.3"), ('X-Requested-With', "XMLHttpRequest"), ('Referer', request_url), ('Connection', "keep-alive")] opener.addheaders = headers #print(type(opener)) #> <class 'urllib.request.OpenerDirector'> try: response = opener.open(request_url) #print(type(response)) #> <class 'http.client.HTTPResponse'> except urllib.error.HTTPError as err: # consider allowing error to bubble up and be handled at the worker level (friend_collector.py) if VERBOSE_SCRAPER: print("FRIENDS PAGE NOT FOUND:", screen_name.upper()) breakpoint() return [], None response_body = response.read() #print(type(response_body)) #> bytes soup = BeautifulSoup(response_body.decode(), "html.parser") #print(type(soup)) #> <class 'bs4.BeautifulSoup'> #print(soup.prettify()) # # <span class="count">262</span> # #friends_count = int(soup.find("span", "count").text) #print("FRIENDS COUNT (TOTAL / EXPECTED):", friends_count) # # <form action="/i/guest/follow/SCREEN_NAME_X" method="post"> # <span class="m2-auth-token"> # <input name="authenticity_token" type="hidden" value="..."/> # </span> # <span class="w-button-common w-button-follow"> # <input alt="..." src="https://ma.twimg.com/twitter-mobile/.../images/sprites/followplus.gif" type="image"/> # </span> # </form> # forms = soup.find_all("form") substr = "/i/guest/follow/" friend_names = [ f.attrs["action"].replace(substr, "") for f in forms if substr in f.attrs["action"] ] if VERBOSE_SCRAPER: print("FRIENDS PAGE:", len(friend_names)) #> 20 try: # # <div class="w-button-more"> # <a href="/SCREEN_NAME/following?cursor=CURSOR_ID">...</a> # </div> # next_link = soup.find("div", "w-button-more").find("a") next_cursor_id = next_link.attrs["href"].split( "/following?cursor=")[-1] #print("NEXT CURSOR ID:", next_cursor_id) except AttributeError as err: # handle AttributeError: 'NoneType' object has no attribute 'find' # because the last page doesn't have a next page or corresponding "w-button-more" next_cursor_id = None return friend_names, next_cursor_id
def create_new(self): self._error = None if not (EMAIL and PASSWORD): self._error = 'Incorrect login data' logger.error(self._error) return False login_data = { "act": "users", "type": "login", "mail": EMAIL, "pass": PASSWORD, "need_captcha": "", "captcha": "", "rem": 1 } url = SITE_URL + '/ajaxik.php?' params = parse.urlencode(login_data).encode('utf-8') cjar = CookieJar() if proxy['enable']: opener = request.build_opener( request.ProxyHandler(proxy['proxy_urls']), request.HTTPCookieProcessor(cjar)) logger.debug('proxy used') else: opener = request.build_opener(request.HTTPCookieProcessor(cjar)) try: response = opener.open(url, params).read().decode('utf-8') except Exception as e: self._error = 'Connection failed' logger.error('%s %s', self._error, e) return False result = json.loads(response) if 'error' in result: self._error = 'Incorrect login data' elif 'need_captcha' in result: self._error = 'Captcha requested' else: for cookie in cjar: if cookie.name == 'lf_session': self.time = datetime.now() self.token = cookie.value logger.info('%s %s', self.token, self.time) return True else: self._error = 'Token problem' logger.error(self._error) return False
class HttpTransport(Transport): """ HTTP transport using urllib2. Provided basic http transport that provides for cookies, proxies but no authentication. """ def __init__(self, **kwargs): """ @param kwargs: Keyword arguments. - B{proxy} - An http proxy to be specified on requests. The proxy is defined as {protocol:proxy,} - type: I{dict} - default: {} - B{timeout} - Set the url open timeout (seconds). - type: I{float} - default: 90 """ Transport.__init__(self) Unskin(self.options).update(kwargs) self.cookiejar = CookieJar() self.proxy = {} self.urlopener = None def open(self, request): try: url = request.url log.debug('opening (%s)', url) u2request = u2.Request(url) self.proxy = self.options.proxy return self.u2open(u2request) except HTTPError as e: raise TransportError(str(e), e.code, e.fp) def send(self, request): result = None url = request.url msg = request.message headers = request.headers try: u2request = u2.Request(url, msg, headers) self.addcookies(u2request) self.proxy = self.options.proxy request.headers.update(u2request.headers) log.debug('sending:\n%s', request) fp = self.u2open(u2request) self.getcookies(fp, u2request) #result = Reply(200, fp.headers.dict, fp.read()) #print(str(fp)) result = Reply(200, fp.headers, fp.read()) log.debug('received:\n%s', result) except HTTPError as e: if e.code in (202,204): result = None else: raise TransportError(e.msg, e.code, e.fp) return result def addcookies(self, u2request): """ Add cookies in the cookiejar to the request. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.add_cookie_header(u2request) def getcookies(self, fp, u2request): """ Add cookies in the request to the cookiejar. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.extract_cookies(fp, u2request) def u2open(self, u2request): """ Open a connection. @param u2request: A urllib2 request. @type u2request: urllib2.Requet. @return: The opened file-like urllib2 object. @rtype: fp """ tm = self.options.timeout url = self.u2opener() if self.u2ver() < 2.6: socket.setdefaulttimeout(tm) return url.open(u2request) else: return url.open(u2request, timeout=tm) def u2opener(self): """ Create a urllib opener. @return: An opener. @rtype: I{OpenerDirector} """ if self.urlopener is None: return u2.build_opener(*self.u2handlers()) else: return self.urlopener def u2handlers(self): """ Get a collection of urllib handlers. @return: A list of handlers to be installed in the opener. @rtype: [Handler,...] """ handlers = [] handlers.append(u2.ProxyHandler(self.proxy)) return handlers def u2ver(self): """ Get the major/minor version of the urllib2 lib. @return: The urllib2 version. @rtype: float """ try: part = u2.__version__.split('.', 1) n = float('.'.join(part)) return n except Exception as e: log.exception(e) return 0 def __deepcopy__(self, memo={}): clone = self.__class__() p = Unskin(self.options) cp = Unskin(clone.options) cp.update(p) return clone
from urllib import request from urllib import parse # import requests from http.cookiejar import CookieJar # url="https://passport.csdn.net/login" url = "https://www.csdn.net/" # https://www.csdn.net/ # reqs=requests.get("https://www.csdn.net/") # request.urlretrieve("https://www.lagou.com/","lagou.html") # print(reqs.read() # print(reqs.text) cooks = CookieJar() handler = request.HTTPCookieProcessor(cooks) opener = request.build_opener(handler) headers = { "cookie": "uuid_tt_dd=10_18967429480-1546066692684-819876; ADHOC_MEMBERSHIP_CLIENT_ID1.0=cd907735-4a72-d35a-9972-3475ee92a278; smidV2=20190314180351d753612bc456e4d739aac872786cff3400791083439258400; UN=qq_42852199; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18967429480-1546066692684-819876!1788*1*PC_VC!5744*1*qq_42852199; dc_session_id=10_1556192991599.310188; _ga=GA1.2.843540842.1556673732; __gads=ID=218b0ee3cd9d1419:T=1582511085:S=ALNI_MaH-fKfl8vTsRc4j__N8vmMC3N-rw; firstDie=1; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblog.csdn.net%252Fblogdevteam%252Farticle%252Fdetails%252F103603408%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; SESSION=6bf07ad7-cc4f-4afe-9ae6-2374285446b4; TY_SESSION_ID=622931f8-1957-45d2-800c-6ee78b836d39; c-toolbar-writeguide=1; c_ref=https%3A//www.baidu.com/link%3Furl%3DK6Hwb4V7yz1uxBimTrM5drW1OyxqeaZfO_kHlvvUqRRI4kgo6jGvTIkF-FXv7bdMIo_jDru7lTNNpr2SjB3OR3h_k2O-1l3x5dlPgwsn2R7%26wd%3D%26eqid%3Db910503d0001398f000000065e7d66cf; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1585274953,1585275230,1585276200,1585276630; UserName=qq_42852199; UserInfo=1a127c512a7544458fe4a25dec7dd1e9; UserToken=1a127c512a7544458fe4a25dec7dd1e9; UserNick=lufeifana; AU=2BA; BT=1585277101610; p_uid=U000000; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1585277109; dc_tos=q7tzn8", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36", } data = { # "username":'******', # "password":"******" 'all': '18376098023', 'pwd': '18376098023aA..' } # req=request.Request(url=url,data=parse.urlencode(data).encode('utf-8'),headers=headers) req = request.Request(url=url, headers=headers) # opener.open(req)
class pyGoogleTrendsCsvDownloader(object): ''' Google Trends Downloader. Recommended usage: from pyGoogleTrendsCsvDownloader import pyGoogleTrendsCsvDownloader r = pyGoogleTrendsCsvDownloader(username, password) r.get_csv_data(cat='0-958', geo='US-ME-500') ''' def __init__(self, username, password, proxy=None): ''' Provide login and password to be used to connect to Google Trends All immutable system variables are also defined here ''' # The amount of time (in secs) that the script should wait before making a request. # This can be used to throttle the downloading speed to avoid hitting servers too hard. # It is further randomized. self.download_delay = 2 self.service = "trendspro" self.url_service = "http://www.google.com/trends/" self.url_download = 'https://www.google.com/trends/trendsReport?' self.login_params = {} # These headers are necessary, otherwise Google will flag the request at your account level self.headers = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36'), ("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Accept-Language", "en-gb,en;q=0.8"), ("Accept-Encoding", "gzip,deflate,sdch"), ("referer", "https://www.google.com/trends/explore"), ("pragma", "no-cache"), ("cache-control", "no-cache"), ] self.url_login = '******'+self.service+'&passive=1209600&continue='+self.url_service+'&followup='+self.url_service self.url_authenticate = 'https://accounts.google.com/accounts/ServiceLoginAuth' self.proxy = proxy self._authenticate(username, password) def _authenticate(self, username, password): ''' Authenticate to Google: 1 - make a GET request to the Login webpage so we can get the login form 2 - make a POST request with email, password and login form input values ''' # Make sure we get CSV results in English ck1 = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) # This cookie is now mandatory # Not sure what the value represents but too many queries from the same value # lead to a Quota Exceeded error. # random_six_char = ''.join(random.choice('0123456789abcdef') for n in xrange(6)) ck2 = Cookie(version=0, name='PREF', value='0000', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) self.cj = CookieJar() self.cj.set_cookie(ck1) self.cj.set_cookie(ck2) if not self.proxy: self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) else: proxy = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy}) self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj), proxy) self.opener.addheaders = self.headers # Get all of the login form input values find_inputs = etree.XPath("//form[@id='gaia_loginform']//input") resp = self.opener.open(self.url_login) data = self.read_gzipped_response(resp).decode() try: xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True)) for input in find_inputs(xmlTree): name = input.get('name') if name: name = name.encode('utf8') value = input.get('value', '').encode('utf8') self.login_params[name] = value except: raise AuthFailedException(("Exception while parsing: %s\n" % traceback.format_exc())) self.login_params["Email".encode('utf8')] = username.encode('utf8') self.login_params["Passwd".encode('utf8')] = password.encode('utf8') params = urllib.parse.urlencode(self.login_params) auth_resp = self.opener.open(self.url_authenticate, params.encode()) # Testing whether Authentication was a success # I noticed that a correct auth sets a few cookies if not self.is_authentication_successfull(auth_resp): raise AuthFailedException('Warning: Authentication failed for user %s' % username) def is_authentication_successfull(self, response): ''' Arbitrary way of us knowing whether the authentication succeeded or not: we look for a SSID cookie-set header value. I noticed that the 4 mandatory cookies were: - SID - SSID - HSID - PREF (but does not need to be set) ''' if response: for h in response.headers._headers: if 'SSID' in h[1]: return True return False def is_quota_exceeded(self, response): # TODO: double check that the check for the content-disposition # is correct if 'Content-Disposition' in [h[0] for h in response.headers._headers]: return False return True def read_gzipped_response(self, response): ''' Since we are adding gzip to our http request Google can answer with gzipped data that needs uncompressing before handling. This method returns the text content of a Http response. ''' if response.info().get('Content-Encoding') == 'gzip': f = gzip.decompress(response.read()) content = f else: content = response.read() return content def get_csv_data(self, **kwargs): ''' Download CSV reports ''' time.sleep(self.download_delay) params = { 'hl': 'en-us', 'export': 1 } params.update(kwargs) # Silly python with the urlencode method params = urllib.parse.urlencode(params).replace("+", "%20") response = self.opener.open(self.url_download + params) # Make sure quotas are not exceeded ;) if self.is_quota_exceeded(response): raise QuotaExceededException() return self.read_gzipped_response(response)
# "http.cookiejar" module is class of processing HTTP Cookie automatically. # Create class http.cookiejar.CookieJar(plicy=None) """ Example """ # Ex1."The most common usage of http.cookiejar." import http.cookiejar, urllib.request cj = http.cookiejar.CookieJar() opener = urllib,request.build_opener(urllib.request.HTTPCookieProcessor(cj)) # Ex2."Use Netscape, Mozilla or Lynx Cookie when open URL." import os, http.cookiejar, urllib.request cj = http.cookiejar.MozillaCookieJar() cj.load(os.path.join(os.path.expanduser("~"),".netscape","cookies.txt")) opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) r = opener.open("http://example.com/") # Ex3."Usage of DefaultCookiePolicy." import urllib.request from http.cookiejar import CookieJar, DefaultCookiePolicy policy = DefaultCookiePolicy( rfc2965=True, strict_ns_domain=Policy.DomainStrict, blocked_domains=["abs.net",".ads.net"]) cj = CookieJar(policy) opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) r = opener.open("http://example.com/")
def module_run(self, domains): base_url = 'https://www.bing.com/search' for domain in domains: self.heading(domain, level=0) base_query = 'domain:' + domain pattern = f'"b_algo"><h2><a href="(?:\\w*://)*(\\S+?)\\.{domain}[^"]*"' subs = [] # control variables new = True page = 0 nr = 50 cookiejar = CookieJar() cookiejar.set_cookie( self.make_cookie('SRCHHPGUSR', f"NEWWND=0&NRSLT={nr}&SRCHLANG=&AS=1", '.bing.com')) # execute search engine queries and scrape results storing subdomains in a list # loop until no new subdomains are found while new == True: content = None query = '' # build query based on results of previous results for sub in subs: query += f" -domain:{sub}.{domain}" full_query = base_query + query url = f"{base_url}?first={page*nr}&q={urllib.parse.quote_plus(full_query)}" # bing errors out at > 2059 characters not including the protocol if len(url) > 2066: url = url[:2066] self.verbose(f"URL: {url}") # send query to search engine resp = self.request(url, cookiejar=cookiejar) if resp.status_code != 200: self.alert( 'Bing has encountered an error. Please submit an issue for debugging.' ) break content = resp.text sites = re.findall(pattern, content) # create a unique list sites = list(set(sites)) new = False # add subdomain to list if not already exists for site in sites: if site not in subs: subs.append(site) new = True host = f"{site}.{domain}" self.insert_hosts(host) if not new: # exit if all subdomains have been found if not '>Next</a>' in content: break else: page += 1 self.verbose( f"No New Subdomains Found on the Current Page. Jumping to Result {(page*nr)+1}." ) new = True # sleep script to avoid lock-out self.verbose('Sleeping to avoid lockout...') time.sleep(random.randint(5, 15))
from base64 import b64encode from http.cookiejar import CookieJar from datetime import datetime, timezone, timedelta import unittest import unittest.mock from functools import partial from webtest import TestApp from zstandard import ZstdCompressor # type: ignore from penguin_judge.api import app as _app, _kdf from penguin_judge.models import (User, Environment, Contest, Problem, TestCase, Submission, JudgeResult, Token, JudgeStatus, configure, transaction) from . import TEST_DB_URL app = TestApp(_app, cookiejar=CookieJar()) class TestAPI(unittest.TestCase): @classmethod def setUpClass(cls): configure(**{'sqlalchemy.url': TEST_DB_URL}, drop_all=True) def setUp(self): from penguin_judge.main import _configure_app app.reset() _configure_app({}) tables = (JudgeResult, Submission, TestCase, Problem, Contest, Environment, Token, User) admin_token = bytes([i for i in range(32)]) salt = b'penguin' passwd = _kdf('penguinpenguin', salt)
def install_proxy_opener(): global cookies cookies = CookieJar() proxy = ProxyHandler({}) opener = build_opener(proxy, HTTPCookieProcessor(cookies)) install_opener(opener)
from urllib import request,parse from http.cookiejar import CookieJar header={ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36' } cookiejar = CookieJar() handler = request.HTTPCookieProcessor(cookiejar) opener = request.build_opener(handler) post_url = 'https://www.pplt.net/login.php?' post_data = parse.urlencode({ 'username':'******', 'password':'******' }) req = request.Request(post_url,data=post_data.encode('utf-8')) opener.open(req) url='https://www.pplt.net/index.php' rq=request.Request(url,headers=header) resp=opener.open(rq) print(resp.read().decode('gbk'))