def cookiejar(self, domain): """Returns a cookielib.CookieJar object containing cookies matching the given domain.""" cj = CookieJar() if not domain: return cj # Domain comes from a urlparse().netloc so we must take care of optional port number search_ip = IP_REGEX.match(domain) if search_ip: # IPv4 (ex: '127.0.0.1') or IPv6 (ex: '[::1]') address. # We must append the '.local' suffix pour IPv6 addresses. domain = search_ip.group("ip") if domain.startswith("[") and not domain.endswith(".local"): domain += ".local" matching_domains = [domain] else: domain = domain.split(":")[0] # For hostnames on local network we must add a 'local' tld (needed by cookielib) if '.' not in domain: domain += ".local" domain_key = domain if domain[0] == '.' else '.' + domain exploded = domain_key.split(".") parent_domains = [".%s" % (".".join(exploded[x:])) for x in range(1, len(exploded) - 1)] matching_domains = [d for d in parent_domains if d in self.cookiedict] if not matching_domains: return cj for d in matching_domains: for path in self.cookiedict[d]: for cookie_name, cookie_attrs in self.cookiedict[d][path].items(): ck = Cookie( version=cookie_attrs["version"], name=cookie_name, value=cookie_attrs["value"], port=None, port_specified=False, domain=d, domain_specified=True, domain_initial_dot=False, path=path, path_specified=True, secure=cookie_attrs["secure"], expires=cookie_attrs["expires"], discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False ) if cookie_attrs["port"]: ck.port = cookie_attrs["port"] ck.port_specified = True cj.set_cookie(ck) return cj
def enviaPeticion(url, dominio, ruta, cookieDicc={"TESTID": "set"}): try: # Empaquetador de cookies. jar = CookieJar() # Genera un objeto request para posterior peticion. peticion = urllib.request.Request(url=url) # crearCookie to generate a cookie and add it to the cookie jar. for key, item in cookieDicc.items(): jar.set_cookie(crearCookie(key, item, dominio, ruta)) # print(crearCookie(key, item)) jar.add_cookie_header(peticion) # Generar peticion. edmundoDantes = urllib.request.build_opener() abreteSesamo = edmundoDantes.open(peticion) RiquezaYVenganza = verificacionAcceso(abreteSesamo) if RiquezaYVenganza: print( "Busca tu propio Arbol") else: print( "!(Busca tu propio arbol)") return RiquezaYVenganza except urllib.error.HTTPError as err: print("Pagina fuera de servicio") return "Pagina fuera de servicio"
def dict_2_cookiejar(d): cj = CookieJar() for c in d: ck = Cookie( name=c["name"], value=urllib.parse.unquote(c["value"]), domain=c["domain"], path=c["path"], secure=c["secure"], rest={"HttpOnly": c["httponly"]}, version=0, port=None, port_specified=False, domain_specified=False, domain_initial_dot=False, path_specified=True, expires=None, discard=True, comment=None, comment_url=None, rfc2109=False, ) cj.set_cookie(ck) return cj
def _init_cookies(cookie_jar: CookieJar, firefox_cookies_path: str): """ Initialize cookies from firefox :param cookie_jar: :param firefox_cookies_path: Firefox Cookies SQLite file For example, in linux, the cookies may at ~/.mozilla/firefox/*/cookies.sqlite :return: """ if firefox_cookies_path is None: firefox_cookies_path = __COOKIES_PATH con = sqlite3.connect(firefox_cookies_path) cur = con.cursor() # noinspection SqlResolve cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") for item in cur.fetchall(): c = Cookie( 0, item[4], item[5], None, False, item[0], item[0].startswith('.'), item[0].startswith('.'), item[1], False, item[2], item[3], item[3] == "", None, None, {} ) cookie_jar.set_cookie(c) return cookie_jar
def test_set_cookie_with_cookiejar() -> None: """ Send a request including a cookie, using a `CookieJar` instance. """ url = "http://example.org/echo_cookies" cookies = CookieJar() cookie = Cookie( version=0, name="example-name", value="example-value", port=None, port_specified=False, domain="", domain_specified=False, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={"HttpOnly": ""}, rfc2109=False, ) cookies.set_cookie(cookie) client = httpx.Client( cookies=cookies, transport=httpx.MockTransport(get_and_set_cookies) ) response = client.get(url) assert response.status_code == 200 assert response.json() == {"cookies": "example-name=example-value"}
def _getCookies(headers): cj = CookieJar() cookies = headers.split(',') for cookie in cookies: attrs = cookie.split(';') cookieNameValue = name = attrs[0].strip().split('=') pathNameValue = name = attrs[1].strip().split('=') ck = Cookie(version=1, name=cookieNameValue[0], value=cookieNameValue[1], port=443, port_specified=False, domain=swaDomain, domain_specified=True, domain_initial_dot=False, path=pathNameValue[1], path_specified=True, secure=True, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cj.set_cookie(ck) return cj
def get_cookie_jar(): ''' Creates a cookie jar for the mechanicalsoup browser ''' from http.cookiejar import Cookie, CookieJar cj = CookieJar() c = Cookie(version=0, name=COOKIE_NAME, value=COOKIE_VALUE, port=None, port_specified=False, domain='scplanner.net', domain_specified=True, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest=None, rfc2109=True) cj.set_cookie(c) return cj
async def test_setting_client_cookies_to_cookiejar(): """ Send a request including a cookie, using a `CookieJar` instance. """ url = "http://example.org/echo_cookies" cookies = CookieJar() cookie = Cookie( version=0, name="example-name", value="example-value", port=None, port_specified=False, domain="", domain_specified=False, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={"HttpOnly": None}, rfc2109=False, ) cookies.set_cookie(cookie) client = Client(dispatch=MockDispatch()) client.cookies = cookies response = await client.get(url) assert response.status_code == 200 assert response.json() == {"cookies": "example-name=example-value"}
def test_get_auth_cookie_expires_with_jar(mocker, mock_load_cookie): mock_cookiejar = CookieJar() da = datetime.now() # Cookie(version, name, value, port, port_specified, domain, # domain_specified, domain_initial_dot, path, path_specified, # secure, discard, comment, comment_url, rest) c = Cookie( None, "Authorization", "bar", "80", "80", "www.foo.bar", None, None, "/", None, False, False, "TestCookie", None, None, None, ) c.expires = (da + timedelta(minutes=12)).timestamp() mock_cookiejar.set_cookie(c) mockdate = mocker.patch("nhltv_lib.auth.datetime") mockdate.now.return_value = da mockdate.fromtimestamp.return_value = da + timedelta(minutes=12) mock_load_cookie.return_value = mock_cookiejar assert get_auth_cookie_expires_in_minutes() == 12
def test_get_auth_cookie_value_with_jar(mocker, mock_load_cookie): mock_cookiejar = CookieJar() # Cookie(version, name, value, port, port_specified, domain, # domain_specified, domain_initial_dot, path, path_specified, # secure, discard, comment, comment_url, rest) c = Cookie( None, "Authorization", "bar", "80", "80", "www.foo.bar", None, None, "/", None, False, False, "TestCookie", None, None, None, ) c.expires = (datetime.now() + timedelta(hours=12)).timestamp() mock_cookiejar.set_cookie(c) mock_load_cookie.return_value = mock_cookiejar assert get_auth_cookie_value() == "bar"
def create_cookie(self): """ Cookie de autorizacion de logueo """ cookie_jar = CookieJar() cookie = Cookie(0, self.cookie_name, self.cookie_value, self.port, None, self.host, None, None, self.path, None, False, None, None, '', '', None, True) cookie_jar.set_cookie(cookie) return cookie_jar
def checkAnswer(request): request.encoding = 'utf-8' try: problem_id = request.GET['problem_id'] difficulty = request.GET['difficulty'] query = request.GET['query'] s = "49" except Exception as e: return JsonResponse({"information": "failure for check answer"}) checkUrl = "http://www5a.wolframalpha.com/input/wpg/checkanswer.jsp?attempt=1&difficulty=" + difficulty + "&load=true&problemID=" + problem_id + "&query=" + query + "&s=" + s + "&type=InputField" print(checkUrl) try: headers = { 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 'Accept-Encoding': "gzip, deflate, sdch", 'Accept-Language': "zh-CN,zh;q=0.8", 'Connection': "keep-alive", 'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' # 'Host': "www5a.wolframalpha.com", # 'Upgrade-Insecure-Requests': "1", # 'Cache-Control': "max-age=0" } # checkUrl = "http://www5a.wolframalpha.com/input/wpg/checkanswer.jsp?attempt=1&difficulty=Beginner&load=true&problemID=MSP30821f17b43d8fie9ab40000452beg427da501bg&query=1&s=49&type=InputField" req = urllib.request.Request(checkUrl, headers=headers) # oriData = urllib.request.urlopen(req).read() # 自定义2个cookie,信息来自getMathQuestion打印出来的信息 c = Cookie(0, 'WR_SID', '120.236.174.172.1497365721706807', None, False, '.wolframalpha.com', True, True, '/', True, False, 1812725721, False, None, None, None) c2 = Cookie(0, 'JSESSIONID', '9EF3562A8408891A3958EDE4C5644E9C', None, False, 'www5a.wolframalpha.com', False, False, '/', True, False, None, True, None, None, None) cj2 = CookieJar() cj2.set_cookie(c) cj2.set_cookie(c2) print(cj2) opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj2)) oriData = opener.open(req).read() except urllib.request.HTTPError as e: print(e.code) # print(e.read()) return JsonResponse({"information": "failure for http"}) except urllib.request.URLError as e: print(str(e)) return JsonResponse({"information": "failure for url"}) return JsonResponse(json.loads(oriData))
class BuiltinBrowser(BaseBrowser): def __init__(self, base_url=None): base_url = get_default_workflowy_url(base_url) super().__init__(base_url) self.cookie_jar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar)) def open(self, url, *, _raw=False, _query=None, **kwargs): full_url = urljoin(self.base_url, url) if _query is not None: full_url += "?" + urlencode(_query) data = urlencode(kwargs).encode() headers = { "Content-Type" : "application/x-www-form-urlencoded", } req = Request(full_url, data, headers) res = self.opener.open(req) with closing(res) as fp: content = fp.read() content = content.decode() if not _raw: # TODO: must not raise 404 error content = json.loads(content) return res, content def set_cookie(self, name, value): url = urlparse(self.base_url) cookie = Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=url.netloc, domain_specified=False, domain_initial_dot=False, path=url.path, path_specified=True, secure=False, expires=sys.maxsize, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False, ) self.cookie_jar.set_cookie(cookie)
class BuiltinBrowser(BaseBrowser): def __init__(self, base_url=None): base_url = get_default_workflowy_url(base_url) super().__init__(base_url) self.cookie_jar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar)) def open(self, url, *, _raw=False, _query=None, **kwargs): full_url = urljoin(self.base_url, url) if _query is not None: full_url += "?" + urlencode(_query) data = urlencode(kwargs).encode() headers = { "Content-Type": "application/x-www-form-urlencoded", } req = Request(full_url, data, headers) res = self.opener.open(req) with closing(res) as fp: content = fp.read() content = content.decode() if not _raw: # TODO: must not raise 404 error content = json.loads(content) return res, content def set_cookie(self, name, value): url = urlparse(self.base_url) cookie = Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=url.netloc, domain_specified=False, domain_initial_dot=False, path=url.path, path_specified=True, secure=False, expires=sys.maxsize, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False, ) self.cookie_jar.set_cookie(cookie)
def load_cookie_data(self, filename, ignore_discard=False, ignore_expires=False): """Load cookies from file containing actual cookie data. Old cookies are kept unless overwritten by newly loaded ones. You should not call this method if the delayload attribute is set. I think each of these files contain all cookies for one user, domain, and path. filename: file containing cookies -- usually found in a file like C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt """ now = int(time.time()) cookie_data = self._load_cookies_from_file(filename) for cookie in cookie_data: flags = cookie["FLAGS"] secure = ((flags & 0x2000) != 0) filetime = (cookie["HIXP"] << 32) + cookie["LOXP"] expires = epoch_time_offset_from_win32_filetime(filetime) if expires < now: discard = True else: discard = False domain = cookie["DOMAIN"] initial_dot = domain.startswith(".") if initial_dot: domain_specified = True else: # MSIE 5 does not record whether the domain cookie-attribute # was specified. # Assuming it wasn't is conservative, because with strict # domain matching this will match less frequently; with regular # Netscape tail-matching, this will match at exactly the same # times that domain_specified = True would. It also means we # don't have to prepend a dot to achieve consistency with our # own & Mozilla's domain-munging scheme. domain_specified = False # assume path_specified is false # XXX is there other stuff in here? -- e.g. comment, commentURL? c = Cookie(0, cookie["KEY"], cookie["VALUE"], None, False, domain, domain_specified, initial_dot, cookie["PATH"], False, secure, expires, discard, None, None, {"flags": flags}) if not ignore_discard and c.discard: continue if not ignore_expires and c.is_expired(now): continue CookieJar.set_cookie(self, c)
class BasicParser: def __init__(self): self.cookiejar = None self.cookie_str = None self.opener = None self.headers = HEADERS self.initOpener() def initOpener(self): self.cookiejar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookiejar)) self.opener.addheaders = list(self.headers.items()) def setHeaders(self, headers_dict): self.headers = headers_dict self.initOpener() def setCookie(self, name, value, domain, path): self.cookiejar.set_cookie( Cookie(0, name, value, None, False, domain, True, False, path, True, False, None, False, None, None, None)) def loadCookie(self, cookie_str): self.cookie_str = cookie_str self.headers['Cookie'] = cookie_str self.initOpener() def requestRaw(self, *args, **kwargs): counter = 0 req = Request(*args, **kwargs) while True: try: time.sleep(0.05) res = self.opener.open(req) except (socket.timeout, URLError, SSLError): counter += 1 time.sleep(0.3) if counter >= 10: raise socket.timeout continue else: break return res def request(self, *args, **kwargs): res = self.requestRaw(*args, **kwargs) raw = res.read() text = raw_decompress(raw, res.info()) res.close() return text def parse(self, *args): pass
def module_run(self, domains): url = 'http://searchdns.netcraft.com/' pattern = r'<a class="results-table__host" href="http://(.*?)/"' # answer challenge cookie cookiejar = CookieJar() payload = {'restriction': 'site+ends+with', 'host': 'test.com'} resp = self.request('GET', url, params=payload, cookies=cookiejar) cookiejar = resp.cookies for cookie in cookiejar: if cookie.name == 'netcraft_js_verification_challenge': challenge = cookie.value response = hashlib.sha1( unquote_plus(challenge).encode('utf-8')).hexdigest() cookiejar.set_cookie( self.make_cookie('netcraft_js_verification_response', f"{response}", '.netcraft.com')) break for domain in domains: self.heading(domain, level=0) payload['host'] = domain subs = [] # execute search engine queries and scrape results storing subdomains in a list # loop until no Next Page is available while True: self.verbose(f"URL: {url}?{urlencode(payload)}") resp = self.request('GET', url, params=payload, cookies=cookiejar) content = resp.text sites = re.findall(pattern, content) # create a unique list sites = list(set(sites)) # add subdomain to list if not already exists for site in sites: if site not in subs: subs.append(site) self.insert_hosts(site) # verifies if there's more pages to look while grabbing the correct # values for our payload... link = re.findall(r'(\blast\=\b|\bfrom\=\b)(.*?)&', content) if not link: break else: payload['last'] = link[0][1] payload['from'] = link[1][1] self.verbose('Next page available! Requesting again...') # sleep script to avoid lock-out self.verbose('Sleeping to Avoid Lock-out...') time.sleep(random.randint(5, 15)) if not subs: self.output('No results found.')
def _get_cookies(self, ): '''Returns user's browser cookies''' '''From file for testing''' if path.isfile(self.path_to_cookies_temp): with open('..\\config\\cookies.pickle', 'rb') as f: load_list_cookies = pickle.load(f) cj = CookieJar() for i in load_list_cookies: cj.set_cookie(i) else: # From Chrome cookie database cj = chrome() return cj
def test_app_secure_cookies(): cookies_view.set_secure_cookie('test', '内容测试') cookies_view.set_secure_cookie('test2', {'value': '内容测试'}) cookies_view.finish(RETCODE.SUCCESS) cookies_jar = CookieJar() for k, v in cookies_view.response.cookies.items(): cookies_jar.set_cookie(morsel_to_cookie(v)) cookies_view.request.cookies = dict_from_cookiejar(cookies_jar) assert cookies_view.get_secure_cookie('test') == '内容测试' assert cookies_view.get_secure_cookie('test2') == {'value': '内容测试'}
def cookiejar_from_dict(cookie_dict, cookiejar=None): """Returns a CookieJar from a key/value dictionary. :param cookie_dict: Dict of key/values to insert into CookieJar. """ if not isinstance(cookie_dict, CookieJar): if cookiejar is None: cookiejar = CookieJar() if cookie_dict is not None: for name in cookie_dict: cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) return cookiejar else: return cookie_dict
def __read_cj(self): MAX_EPS = 86400 #24 hours if os.path.exists("cookiejar.dat"): modtime = os.stat("cookiejar.dat").st_mtime if time.time() - modtime > MAX_EPS: return None else: dd = pickle.load(open("cookiejar.dat", "rb")) cj = CookieJar() for c in dd["cookies"]: cj.set_cookie(c) self.__uid = dd["uid"] self.__nickname = dd["nick"] return cj else: return None
def cookiejar_from_dict(*cookie_dicts): """Returns a CookieJar from a key/value dictionary. :param cookie_dict: Dict of key/values to insert into CookieJar. """ cookie_dicts = tuple((d for d in cookie_dicts if d)) if len(cookie_dicts) == 1 and isinstance(cookie_dicts[0], CookieJar): return cookie_dicts[0] cookiejar = CookieJar() for cookie_dict in cookie_dicts: if isinstance(cookie_dict, CookieJar): for cookie in cookie_dict: cookiejar.set_cookie(cookie) else: for name in cookie_dict: cookiejar.set_cookie(create_cookie(name, cookie_dict[name])) return cookiejar
def __init__(self): # establish connection # # make cookie cj = CookieJar() # if we wanna use https we mast add ssl=enable_ssl to cookie c = Cookie(0, 'ssl', "enable_ssl", None, False, '.nnm-club.me', True, False, '/', True, False, None, 'ParserCookie', None, None, None) cj.set_cookie(c) self.session = build_opener(HTTPCookieProcessor(cj)) # avoid endless waiting self.blocked = False # add proxy handler if needed if self.config['proxy'] and any(self.config['proxies'].keys()): self.session.add_handler(ProxyHandler(self.config['proxies'])) # change user-agent self.session.addheaders.pop() self.session.addheaders.append(('User-Agent', self.config['ua'])) response = self._catch_error_request(self.url + 'login.php') if not self.blocked: code = re.search(r'code"\svalue="(.+?)"', response.read().decode('cp1251'))[1] form_data = { "username": self.config['username'], "password": self.config['password'], "autologin": "******", "code": code, "login": "******" } # so we first encode keys to cp1251 then do default decode whole string data_encoded = urlencode( {k: v.encode('cp1251') for k, v in form_data.items()}).encode() self._catch_error_request(self.url + 'login.php', data_encoded) if 'phpbb2mysql_4_sid' not in [cookie.name for cookie in cj]: logging.warning( "we not authorized, please check your credentials") else: logging.info('We successfully authorized')
def set_cookie(self, cookie): if cookie.discard: CookieJar.set_cookie(self, cookie) return def set_cookie(cur): # XXX # is this RFC 2965-correct? # could this do an UPDATE instead? row = self._row_from_cookie(cookie, cur) name, unused, domain, path = row[1:5] cur.execute("""\ DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", (domain, path, name)) cur.execute("""\ INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, row) self._transaction(set_cookie)
def to_cookiejar(cookies): """Build CookieJar object from dict, list or tuple Attributes: - `cookies`: (dict, list or tuple) Returns: - `cookiejar`: `CookieJar` instance """ if isinstance(cookies, CookieJar): return cookies tmp_cookies = [] if isinstance(cookies, (tuple, list)): tmp_cookies = cookies elif isinstance(cookies, dict): tmp_cookies = [(k, v) for k, v in cookies.items()] else: raise ValueError("Unsupported argument") cookie_jar = CookieJar() for k, v in tmp_cookies: cookie = Cookie( version=0, name=k, value=v, port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie_jar.set_cookie(cookie) return cookie_jar
def testCookieAdapters(self): jar = CookieJar(policy=None) # DefaultCookiePolicy()) # set a cookie res = Response() tstval = str(uuid.uuid4()) res.set_cookie("a-cookie", tstval, domain="example.com") cookies = jar.make_cookies(filters.ResponseCookieAdapter(res), Request.blank("http://example.com")) for c in cookies: jar.set_cookie(c) self.assert_(len(jar), ("where's my cookies?")) self.assert_("a-cookie" in [c.name for c in jar], "seriously, where's my cookie") # now put the header on the request please request = Request.blank("http://example.com") self.assert_(".example.com" in jar._cookies.keys(), jar._cookies.keys()) jar.add_cookie_header(filters.RequestCookieAdapter(request)) self.assert_("Cookie" in request.headers, (str(request), "Y NO COOKIES?"))
def load_ff_sessions(session_filename): """使用sqlite3解析Chrome的cookie文件到CookieJar中""" cookie_file_path = os.path.join( os.environ['LOCALAPPDATA'], r'Google\Chrome\User Data\Default\Cookies') print(cookie_file_path) if not os.path.exists(cookie_file_path): raise Exception('Cookies file not exist!') conn = sqlite3.connect(cookie_file_path) sql = 'select host_key,name,encrypted_value,path from cookies where host_key like "%{}%"'.format( 'example.webscraping.com') cj = CookieJar() for row in conn.execute(sql): print(row[0]) print(row[1]) print(row[2]) print(row[3]) try: ret = win32crypt.CryptUnprotectData(row[2], None, None, None, 0) except: print('Fail to decrypt chrome cookies') sys.exit(-1) c = Cookie( version=0, name=row[1], value=ret[1].decode(), # !!!!!!!!!!!!! 此处务必decode ~ port=None, port_specified=None, domain=row[0], domain_specified=None, domain_initial_dot=None, path=row[3], path_specified=None, secure=None, expires=None, discard=None, comment=None, comment_url=None, rest=None, rfc2109=False) cj.set_cookie(c) return cj
def param_cookies(): #用cookies做登陆 # 发送Cookie到服务器端 requests.request(method='POST', url='http://127.0.0.1:8000/test/', data={'k1': 'v1', 'k2': 'v2'}, cookies={'cook1': 'value1'}, ) # 也可以使用CookieJar(字典形式就是在此基础上封装) from http.cookiejar import CookieJar from http.cookiejar import Cookie obj = CookieJar() obj.set_cookie(Cookie(version=0, name='c1', value='v1', port=None, domain='', path='/', secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False, port_specified=False, domain_specified=False, domain_initial_dot=False, path_specified=False) ) requests.request(method='POST', url='http://127.0.0.1:8000/test/', data={'k1': 'v1', 'k2': 'v2'}, cookies=obj)
def set_cookie(self, cookie): if cookie.discard: CookieJar.set_cookie(self, cookie) return def set_cookie(cur): # XXX # is this RFC 2965-correct? # could this do an UPDATE instead? row = self._row_from_cookie(cookie, cur) name, unused, domain, path = row[1:5] cur.execute( """\ DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", (domain, path, name)) cur.execute( """\ INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, row) self._transaction(set_cookie)
def load_gmail_cookies(account_name, cookies_key): """Load GMail cookies for account Result is a CookieJar object. """ file_name = get_gmail_cookie_file_name(account_name) try: with open(file_name, "rb") as f: ciphertext = f.read() except IOError: return [] try: cookies = pickle.loads( Fernet(cookies_key.encode("ASCII")).decrypt(ciphertext)) except InvalidToken: return [] cookie_jar = CookieJar() for cookie in cookies: if not isinstance(cookie, http.cookiejar.Cookie): return [] cookie_jar.set_cookie(cookie) return cookie_jar
def get_summary_by_team(target): cj = CookieJar() cookie_db = get_cookie_db_path(str(FIREFOX_DIR)) conn = db.connect(cookie_db) cursor = conn.cursor() sql = "SELECT {c} FROM moz_cookies WHERE host LIKE '%{h}%'".format( c=CONTENTS, h=host) cursor.execute(sql) for item in cursor.fetchall(): c = Cookie(0, item[4], item[5], None, False, item[0], item[0].startswith('.'), item[0].startswith('.'), item[1], False, item[2], item[3], item[3] == "", None, None, {}) #print c cj.set_cookie(c) opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj)) response = opener.open(target) xhtml = response.read().decode("utf-8") #req = urllib.request.Request(url=target) #f = urllib.request.urlopen(req) #xhtml = f.read().decode("utf-8") return html_to_pd(xhtml, 2)
def jar(self) -> CookieJar: jar = CookieJar() for cookie_dict in self._driver.get_cookies(): jar.set_cookie(Cookies.create(cookie_dict)) return jar
def save(self, jar: CookieJar): for cookie_dict in self._driver.get_cookies(): jar.set_cookie(Cookies.create(cookie_dict))
def set_cookie(self, cookie): if self.delayload: self._delayload_domain(cookie.domain) CookieJar.set_cookie(self, cookie)
def get(self, options, url): cj = CookieJar() match = re.search(".*video/([0-9]+)", url) if not match: log.error("Can't find video file") sys.exit(2) video_id = match.group(1) if options.username and options.password: #bogus cc = Cookie(None, 'asdf', None, '80', '80', 'www.kanal5play.se', None, None, '/', None, False, False, 'TestCookie', None, None, None) cj.set_cookie(cc) #get session cookie data = get_http_data("http://www.kanal5play.se/", cookiejar=cj) authurl = "https://kanal5swe.appspot.com/api/user/login?callback=jQuery171029989&email=%s&password=%s&_=136250" % (options.username, options.password) data = get_http_data(authurl) match = re.search("({.*})\);", data) jsondata = json.loads(match.group(1)) if jsondata["success"] == False: log.error(jsondata["message"]) sys.exit(2) authToken = jsondata["userData"]["auth"] cc = Cookie(version=0, name='authToken', value=authToken, port=None, port_specified=False, domain='www.kanal5play.se', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}) cj.set_cookie(cc) format = "FLASH" if options.hls: format = "IPHONE" url = "http://www.kanal5play.se/api/getVideo?format=%s&videoId=%s" % (format, video_id) data = json.loads(get_http_data(url, cookiejar=cj)) options.live = data["isLive"] if data["hasSubtitle"]: subtitle = "http://www.kanal5play.se/api/subtitles/%s" % video_id if options.hls: url = data["streams"][0]["source"] baseurl = url[0:url.rfind("/")] if data["streams"][0]["drmProtected"]: log.error("We cant download drm files for this site.") sys.exit(2) download_hls(options, url, baseurl) else: steambaseurl = data["streamBaseUrl"] streams = {} for i in data["streams"]: stream = {} stream["source"] = i["source"] streams[int(i["bitrate"])] = stream test = select_quality(options, streams) filename = test["source"] match = re.search("^(.*):", filename) options.other = "-W %s -y %s " % ("http://www.kanal5play.se/flash/K5StandardPlayer.swf", filename) download_rtmp(options, steambaseurl) if options.subtitle: if options.output != "-": data = get_http_data(subtitle, cookiejar=cj) subtitle_json(options, data)
class Cookies(MutableMapping): """ HTTP Cookies, as a mutable mapping. """ def __init__(self, cookies: CookieTypes = None) -> None: if cookies is None or isinstance(cookies, dict): self.jar = CookieJar() if isinstance(cookies, dict): for key, value in cookies.items(): self.set(key, value) elif isinstance(cookies, Cookies): self.jar = CookieJar() for cookie in cookies.jar: self.jar.set_cookie(cookie) else: self.jar = cookies def extract_cookies(self, response: Response) -> None: """ Loads any cookies based on the response `Set-Cookie` headers. """ urlib_response = self._CookieCompatResponse(response) urllib_request = self._CookieCompatRequest(response.request) self.jar.extract_cookies(urlib_response, urllib_request) # type: ignore def set_cookie_header(self, request: Request) -> None: """ Sets an appropriate 'Cookie:' HTTP header on the `Request`. """ urllib_request = self._CookieCompatRequest(request) self.jar.add_cookie_header(urllib_request) def set(self, name: str, value: str, domain: str = "", path: str = "/") -> None: """ Set a cookie value by name. May optionally include domain and path. """ kwargs = { "version": 0, "name": name, "value": value, "port": None, "port_specified": False, "domain": domain, "domain_specified": bool(domain), "domain_initial_dot": domain.startswith("."), "path": path, "path_specified": bool(path), "secure": False, "expires": None, "discard": True, "comment": None, "comment_url": None, "rest": {"HttpOnly": None}, "rfc2109": False, } cookie = Cookie(**kwargs) # type: ignore self.jar.set_cookie(cookie) def get( # type: ignore self, name: str, default: str = None, domain: str = None, path: str = None ) -> typing.Optional[str]: """ Get a cookie by name. May optionally include domain and path in order to specify exactly which cookie to retrieve. """ value = None for cookie in self.jar: if cookie.name == name: if domain is None or cookie.domain == domain: # type: ignore if path is None or cookie.path == path: if value is not None: message = f"Multiple cookies exist with name={name}" raise CookieConflict(message) value = cookie.value if value is None: return default return value def delete(self, name: str, domain: str = None, path: str = None) -> None: """ Delete a cookie by name. May optionally include domain and path in order to specify exactly which cookie to delete. """ if domain is not None and path is not None: return self.jar.clear(domain, path, name) remove = [] for cookie in self.jar: if cookie.name == name: if domain is None or cookie.domain == domain: # type: ignore if path is None or cookie.path == path: remove.append(cookie) for cookie in remove: self.jar.clear(cookie.domain, cookie.path, cookie.name) # type: ignore def clear(self, domain: str = None, path: str = None) -> None: """ Delete all cookies. Optionally include a domain and path in order to only delete a subset of all the cookies. """ args = [] if domain is not None: args.append(domain) if path is not None: assert domain is not None args.append(path) self.jar.clear(*args) def update(self, cookies: CookieTypes = None) -> None: # type: ignore cookies = Cookies(cookies) for cookie in cookies.jar: self.jar.set_cookie(cookie) def __setitem__(self, name: str, value: str) -> None: return self.set(name, value) def __getitem__(self, name: str) -> str: value = self.get(name) if value is None: raise KeyError(name) return value def __delitem__(self, name: str) -> None: return self.delete(name) def __len__(self) -> int: return len(self.jar) def __iter__(self) -> typing.Iterator[str]: return (cookie.name for cookie in self.jar) def __bool__(self) -> bool: for _ in self.jar: return True return False class _CookieCompatRequest(urllib.request.Request): """ Wraps a `Request` instance up in a compatibility interface suitable for use with `CookieJar` operations. """ def __init__(self, request: Request) -> None: super().__init__( url=str(request.url), headers=dict(request.headers), method=request.method, ) self.request = request def add_unredirected_header(self, key: str, value: str) -> None: super().add_unredirected_header(key, value) self.request.headers[key] = value class _CookieCompatResponse: """ Wraps a `Request` instance up in a compatibility interface suitable for use with `CookieJar` operations. """ def __init__(self, response: Response): self.response = response def info(self) -> email.message.Message: info = email.message.Message() for key, value in self.response.headers.items(): info[key] = value return info
class Response(object): """Response object """ def __init__(self, url, curl_opener, body_output, headers_output, request=None, cookies=None): """ Arguments: :param url: resource url :param curl_opener: :class:`pycurl.Curl` object :param body_output: :StringIO instance :param headers_output: :StringIO instance :param request: :class:`Request` instance :param cookies_jar: :class:`CookieJar` instance """ # Requested url self._request_url = url self._url = None # Request object self._request = request # Response headers self._headers = None # Cookies dictionary self._cookies = None if isinstance(cookies, CookieJar): self._cookies_jar = cookies elif isinstance(cookies, (tuple, dict)): self._cookies_jar = to_cookiejar(cookies) else: self._cookies_jar = None # Seconds from request start to finish self.request_time = None self._curl_opener = curl_opener # StringIO object for response body self._body_output = body_output # StringIO object for response headers self._headers_output = headers_output # :Response status code self._status_code = None # Unziped end decoded response body self._content = None # Redirects history self._history = [] # list of parsed headers blocks self._headers_history = [] # get data from curl_opener.getinfo before curl_opener.close() self._response_info = dict() self._get_curl_info() # not good call methods in __init__ # it's really very BAD # DO NOT UNCOMMENT # self._parse_headers_raw() def __repr__(self): return "<%s: %s >" % (self.__class__.__name__, self.status_code) def _get_curl_info(self): """Extract info from `self._curl_opener` with getinfo() """ for field, value in list(CURL_INFO_MAP.items()): try: field_data = self._curl_opener.getinfo(value) except Exception as e: logger.warn(e) continue else: self._response_info[field] = field_data self._url = self._response_info.get("EFFECTIVE_URL") return self._response_info @property def request(self): return self._request @property def url(self): if not self._url: self._get_curl_info() return self._url @property def status_code(self): if not self._status_code: self._status_code = int(self._curl_opener.getinfo(pycurl.HTTP_CODE)) return self._status_code def raise_for_status(self): http_error_msg = '' if 400 <= self.status_code < 500: http_error_msg = '%s Client Error' % (self.status_code) elif 500 <= self.status_code < 600: http_error_msg = '%s Server Error' % (self.status_code) if http_error_msg: raise HTTPError(code=self.status_code, message=http_error_msg, response=self) @property def cookiesjar(self): """Returns cookie jar object """ if not self._cookies_jar: self._cookies_jar = CookieJar() # add cookies from self._cookies return self._cookies_jar @property def content(self): """Returns decoded self._content """ import zlib if not self._content: if 'gzip' in self.headers.get('Content-Encoding', '') and \ 'zlib' not in pycurl.version: try: self._content = decode_gzip(self._body_output.getvalue()) except zlib.error as e: raise else: self._content = self._body_output.getvalue() return self._content @property def apparent_encoding(self): """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] @property def text(self): """Content of the response, in unicode. If Response.encoding is None, encoding will be guessed using ``chardet``. The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of non-HTTP knowledge to make a better guess at the encoding, you should set ``r.encoding`` appropriately before accessing this property. """ # Try charset from content-type content = None encoding = None if not self.content: return str('') # Fallback to auto-detected encoding. if encoding is None: encoding = self.apparent_encoding # Decode unicode from given encoding. try: content = str(self.content, encoding, errors='replace') except (LookupError, TypeError): # A LookupError is raised if the encoding was not found which could # indicate a misspelling or similar mistake. # # A TypeError can be raised if encoding is None # # So we try blindly encoding. content = str(self.content, errors='replace') return content def json(self): """Returns the json-encoded content of a response """ try: return json.loads(self.content) except ValueError: return None @staticmethod def _split_headers_blocks(raw_headers): i = 0 blocks = [] raw_headers = raw_headers.decode('utf-8') for item in raw_headers.strip().split("\r\n"): if item.startswith("HTTP"): blocks.append([item]) i = len(blocks) - 1 elif item: blocks[i].append(item) # print(repr(blocks)) return blocks def _parse_headers_raw(self): """Parse response headers and save as instance vars """ def parse_header_block(raw_block): r"""Parse headers block Arguments: - `block`: raw header block Returns: - `headers_list`: """ block_headers = [] for header in raw_block: if not header: continue elif not header.startswith("HTTP"): field, value = [u.strip() for u in header.split(":", 1)] if field.startswith("Location"): # maybe not good if not value.startswith("http"): value = urljoin(self.url, value) self._history.append(value) if value[:1] == value[-1:] == '"': value = value[1:-1] # strip " block_headers.append((field, value.strip())) elif header.startswith("HTTP"): # extract version, code, message from first header try: version, code, message = HTTP_GENERAL_RESPONSE_HEADER.findall(header)[0] except Exception as e: logger.warn(e) continue else: block_headers.append((version, code, message)) else: # raise ValueError("Wrong header field") pass return block_headers raw_headers = self._headers_output.getvalue() for raw_block in self._split_headers_blocks(raw_headers): block = parse_header_block(raw_block) self._headers_history.append(block) last_header = self._headers_history[-1] self._headers = CaseInsensitiveDict(last_header[1:]) if not self._history: self._history.append(self.url) def parse_cookies(self): from http.cookies import SimpleCookie, CookieError if not self._headers_history: self._parse_headers_raw() # Get cookies from endpoint cookies = [] for header in chain(*self._headers_history): if len(header) > 2: continue key, value = header[0], header[1] if key.lower().startswith("set-cookie"): try: cookie = SimpleCookie() cookie.load(value) cookies.extend(list(cookie.values())) # update cookie jar for morsel in list(cookie.values()): if isinstance(self._cookies_jar, CookieJar): self._cookies_jar.set_cookie(morsel_to_cookie(morsel)) except CookieError as e: logger.warn(e) self._cookies = dict([(cookie.key, cookie.value) for cookie in cookies]) return self._cookies @property def headers(self): """Returns response headers """ if not self._headers: self._parse_headers_raw() return self._headers @property def cookies(self): """Returns list of BaseCookie object All cookies in list are ``Cookie.Morsel`` instance :return self._cookies: cookies list """ if not self._cookies: self.parse_cookies() return self._cookies @property def history(self): """Returns redirects history list :return: list of `Response` objects """ if not self._history: self._parse_headers_raw() return self._history
class KissmangaFeed(Feed): def __init__(self): super().__init__() self.cookies = CookieJar() self.headers = {} self.NAME = 'Kissmanga' self.TITLE = 'Kissmanga' self.LINK = 'http://kissmanga.com' self.DESCRIPTION = 'Anton\'s manga feed' self.LANGUAGE = 'ja-US' self.COPYRIGHT = 'lol sure' self.IMAGE = 'http://safebooru.org//images/2413/3a64a624c773adaea45afc9ee883d08d764c3336.png?2513303' self._validate() def get_schedule(self) -> IntervalTrigger: return IntervalTrigger(hours=24, jitter=60*60*6) def run(self) -> [RSSItem]: self.synchronize_cookies() results = [] for title in manga: results.extend(self.grab_page(title)) return results def grab_page(self, title: str) -> [RSSItem]: results = [] url = 'http://kissmanga.com/Manga/{}'.format(title) r = requests.get(url, cookies=self.cookies, headers=self.headers) if r.status_code != 200: results.append(failed_scraper(url)) soup = BeautifulSoup(r.text, 'lxml') chapter_list = soup.find(attrs={'class': 'chapterList'}) links = chapter_list.find_all(lambda tag: tag.name == 'a' and tag['href'].startswith('/Manga/{}/'.format(title))) urls = [link['href'] for link in links] already_seen = check_urls(title, urls) new_links = [] new_urls = [] for link, url, seen in zip(links, urls, already_seen): if not seen: new_links.append(link) new_urls.append(url) for link in new_links: results.append(RSSItem( title='{} - New Chapter'.format(title), description=link['title'], link='http://kissmanga.com/{}'.format(link['href']), author='Shameimaru Aya', category=title, )) insert_urls(title, new_urls) return results def synchronize_cookies(self): resp = requests.get('http://www.kissmanga.com', cookies=self.cookies) if resp.status_code == 503: driver = webdriver.Remote( command_executor='http://selenium:4444/wd/hub', desired_capabilities=DesiredCapabilities.HTMLUNITWITHJS ) driver.get('http://www.kissmanga.com') time.sleep(5) try: driver.get('http://www.kissmanga.com') except WebDriverException: pass driver_cookies = driver.get_cookies() driver_agent = driver.execute_script("return navigator.userAgent") driver.close() for cookie in driver_cookies: ck = Cookie( name=cookie['name'], value=cookie['value'], domain=cookie['domain'], path=cookie['path'], secure=cookie['secure'], rest=False, version=0, port=None, port_specified=False, domain_specified=False, domain_initial_dot=False, path_specified=True, expires=cookie['expiry'] if 'expiry' in cookie else 12 * 30 * 24 * 60 * 60, discard=True, comment=None, comment_url=None, rfc2109=False ) self.cookies.set_cookie(ck) self.headers = {'User-Agent': driver_agent}
def to_pycookiejar(QtCookiejar): cj = CookieJar() for c in QtCookiejar.allCookies(): cj.set_cookie(to_py_cookie(c)) return cj
class Swedbank(object): BANKS = { SWEDBANK: { "id": "HithYAGrzi8fu73j", "u-a": "SwedbankMOBPrivateIOS/3.9.0_(iOS;_8.0.2)_Apple/iPhone5,2" }, SPARBANKEN: { "id": "9iZSu74jfDFaTdPd", "u-a": "SavingbankMOBPrivateIOS/3.9.0_(iOS;_8.0.2)_Apple/iPhone5,2" }, SWEDBANK_UNG: { "id": "IV4Wrt2VZtyYjfpW", "u-a": "SwedbankMOBYouthIOS/1.6.0_(iOS;_8.0.2)_Apple/iPhone5,2" }, SPARBANKEN_UNG: { "id": "BrGkZQR89rEbFwnj", "u-a": "SavingbankMOBYouthIOS/1.6.0_(iOS;_8.0.2)_Apple/iPhone5,2" }, SWEDBANK_FORETAG: { "id": "v0RVbFGKMXz7U4Eb", "u-a": "SwedbankMOBCorporateIOS/1.5.0_(iOS;_8.0.2)_Apple/iPhone5,2" }, SPARBANKEN_FORETAG: { "id": "JPf1VxiskNdFSclr", "u-a": "SavingbankMOBCorporateIOS/1.5.0_(iOS;_8.0.2)_Apple/iPhone5,2" } } def __init__(self, username, password, bank=SWEDBANK): """ Set default stuff """ self.data = "" self.pch = None self.authkey = None self.cj = CookieJar() self.profile = None self.account = None self.useragent = None self.bankid = None self.login(username, password, bank) def get_authkey(self): if self.authkey is None: data = "%s:%s" % (self.bankid, uuid.uuid4()) self.authkey = base64.b64encode(data.encode("utf-8")).decode( "utf-8") return self.authkey def get_dsid(self): data = "%s%s" % (random.randint(0, 99999), random.randint(0, 99999)) hashvalue = hashlib.sha1(data.encode("utf-8")).hexdigest()[:8] dsid = "%s%s" % (hashvalue[:4], hashvalue[4:].upper()) random.shuffle(list(dsid)) return ''.join(dsid) def request(self, url, post=None, method="GET"): """ Make the request""" dsid = self.get_dsid() baseurl = "https://auth.api.swedbank.se/TDE_DAP_Portal_REST_WEB/api/v1/%s?dsid=%s" % ( url, dsid) if self.pch is None: self.pch = build_opener(HTTPCookieProcessor(self.cj)) if post: post = bytearray(post, "utf-8") request = Request(baseurl, data=post) request.add_header("Content-Type", "application/json") else: request = Request(baseurl) request.add_header("User-Agent", self.useragent) request.add_header("Authorization", self.get_authkey()) request.add_header("Accept", "*/*") request.add_header("Accept-Language", "sv-se") request.add_header("Connection", "keep-alive") request.add_header("Proxy-Connection", "keep-alive") self.cj.set_cookie( Cookie(version=0, name='dsid', value=dsid, port=None, port_specified=False, domain='.api.swedbank.se', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpsOnly': None}, rfc2109=False)) request.get_method = lambda: method tmp = self.pch.open(request) self.data = tmp.read().decode("utf8") def login(self, user, passwd, bank): """ Login """ logger.info("login...") if bank not in self.BANKS: logger.error("Can't find that bank.") return False self.useragent = self.BANKS[bank]["u-a"] self.bankid = self.BANKS[bank]["id"] login = json.dumps( {"userId": user, "password": passwd, "useEasyLogin": False, "generateEasyLoginId": False}) try: self.request("identification/personalcode", post=login, method="POST") except HTTPError as e: error = json.loads(e.read().decode("utf8")) logger.error(error["errorMessages"]["fields"][0]["message"]) return False try: self.request("profile/") except HTTPError as e: error = json.loads(e.read().decode("utf8")) logger.error(error["errorMessages"]["general"][0]["message"]) return False profile = json.loads(self.getdata()) if len(profile["banks"]) == 0: logger.error("Using wrong bank? Can't find any bank info.") return False try: self.profile = profile["banks"][0]["privateProfile"]["id"] except KeyError: self.profile = profile['banks'][0]['corporateProfiles'][0]["id"] try: self.request("profile/%s" % self.profile, method="POST") except HTTPError as e: error = json.loads(e.read().decode("utf8")) logger.error(error["errorMessages"]["general"][0]["message"]) return False return True def accounts(self): """ Accounts """ logger.info("Fetching data...") try: self.request("engagement/overview") except HTTPError as e: error = json.loads(e.read().decode("utf8")) logger.error(error["errorMessages"]["general"][0]["message"]) return overview = json.loads(self.getdata()) overviewl = reversed(list(overview)) ret = list() for i in overviewl: if len(overview[i]) > 0: for n in overview[i]: if self.account is None and "id" in n: self.account = n["id"] if n.get('balance'): ret.append({n['name']: n['balance']}) elif n.get('availableAmount', None): ret.append({n['name']: n['availableAmount']}) else: logger.error("Unable to parse %s", n) return ret def history(self): """ History """ logger.info("Transactions:") try: logger.debug("Account: %s", self.account) self.request("engagement/transactions/%s" % self.account) except HTTPError as e: error = json.loads(e.read().decode("utf8")) logger.error(error["errorMessages"]["general"][0]["message"]) return transactions = json.loads(self.getdata())["transactions"] ret = list() for i in transactions: ret.append([i["date"], i["description"], i["amount"]]) return ret @staticmethod def banks(): return list(Swedbank.BANKS.keys()) def getdata(self): """ Get the response data """ return self.data
class pyGoogleTrendsCsvDownloader(object): ''' Google Trends Downloader. Recommended usage: from pyGoogleTrendsCsvDownloader import pyGoogleTrendsCsvDownloader r = pyGoogleTrendsCsvDownloader(username, password) r.get_csv_data(cat='0-958', geo='US-ME-500') ''' def __init__(self, username, password, proxy=None): ''' Provide login and password to be used to connect to Google Trends All immutable system variables are also defined here ''' # The amount of time (in secs) that the script should wait before making a request. # This can be used to throttle the downloading speed to avoid hitting servers too hard. # It is further randomized. self.download_delay = 2 self.service = "trendspro" self.url_service = "http://www.google.com/trends/" self.url_download = 'https://www.google.com/trends/trendsReport?' self.login_params = {} # These headers are necessary, otherwise Google will flag the request at your account level self.headers = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36'), ("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Accept-Language", "en-gb,en;q=0.8"), ("Accept-Encoding", "gzip,deflate,sdch"), ("referer", "https://www.google.com/trends/explore"), ("pragma", "no-cache"), ("cache-control", "no-cache"), ] self.url_login = '******'+self.service+'&passive=1209600&continue='+self.url_service+'&followup='+self.url_service self.url_authenticate = 'https://accounts.google.com/accounts/ServiceLoginAuth' self.proxy = proxy self._authenticate(username, password) def _authenticate(self, username, password): ''' Authenticate to Google: 1 - make a GET request to the Login webpage so we can get the login form 2 - make a POST request with email, password and login form input values ''' # Make sure we get CSV results in English ck1 = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) # This cookie is now mandatory # Not sure what the value represents but too many queries from the same value # lead to a Quota Exceeded error. # random_six_char = ''.join(random.choice('0123456789abcdef') for n in xrange(6)) ck2 = Cookie(version=0, name='PREF', value='0000', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None) self.cj = CookieJar() self.cj.set_cookie(ck1) self.cj.set_cookie(ck2) if not self.proxy: self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) else: proxy = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy}) self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj), proxy) self.opener.addheaders = self.headers # Get all of the login form input values find_inputs = etree.XPath("//form[@id='gaia_loginform']//input") resp = self.opener.open(self.url_login) data = self.read_gzipped_response(resp).decode() try: xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True)) for input in find_inputs(xmlTree): name = input.get('name') if name: name = name.encode('utf8') value = input.get('value', '').encode('utf8') self.login_params[name] = value except: raise AuthFailedException(("Exception while parsing: %s\n" % traceback.format_exc())) self.login_params["Email".encode('utf8')] = username.encode('utf8') self.login_params["Passwd".encode('utf8')] = password.encode('utf8') params = urllib.parse.urlencode(self.login_params) auth_resp = self.opener.open(self.url_authenticate, params.encode()) # Testing whether Authentication was a success # I noticed that a correct auth sets a few cookies if not self.is_authentication_successfull(auth_resp): raise AuthFailedException('Warning: Authentication failed for user %s' % username) def is_authentication_successfull(self, response): ''' Arbitrary way of us knowing whether the authentication succeeded or not: we look for a SSID cookie-set header value. I noticed that the 4 mandatory cookies were: - SID - SSID - HSID - PREF (but does not need to be set) ''' if response: for h in response.headers._headers: if 'SSID' in h[1]: return True return False def is_quota_exceeded(self, response): # TODO: double check that the check for the content-disposition # is correct if 'Content-Disposition' in [h[0] for h in response.headers._headers]: return False return True def read_gzipped_response(self, response): ''' Since we are adding gzip to our http request Google can answer with gzipped data that needs uncompressing before handling. This method returns the text content of a Http response. ''' if response.info().get('Content-Encoding') == 'gzip': f = gzip.decompress(response.read()) content = f else: content = response.read() return content def get_csv_data(self, **kwargs): ''' Download CSV reports ''' time.sleep(self.download_delay) params = { 'hl': 'en-us', 'export': 1 } params.update(kwargs) # Silly python with the urlencode method params = urllib.parse.urlencode(params).replace("+", "%20") response = self.opener.open(self.url_download + params) # Make sure quotas are not exceeded ;) if self.is_quota_exceeded(response): raise QuotaExceededException() return self.read_gzipped_response(response)