def pull_contest_zip(contest_id): """Pull contest file (so far can be .zip or .csv file).""" # try pickle cookies method cookies = cj_from_pickle("pickled_cookies_works.txt") if cookies: result = setup_session(contest_id, cookies) logger.debug("type(result): {}".format(type(result))) if result is False: logger.debug("Broken from pickle method") else: logger.debug("pickle method worked!!") return result # try browsercookie method cookies = browsercookie.chrome() for c in cookies: if "draft" not in c.domain: cookies.clear(c.domain, c.path, c.name) else: if c.expires: # chrome is ridiculous - this math is required new_expiry = c.expires / 1000000 new_expiry -= 11644473600 c.expires = new_expiry result = setup_session(contest_id, cookies) logger.debug("type(result): {}".format(type(result))) if result: return result # use selenium to refresh cookies use_selenium(contest_id) # try browsercookie method again cookies = browsercookie.chrome() for c in cookies: if "draft" not in c.domain: cookies.clear(c.domain, c.path, c.name) else: if c.expires: # chrome is ridiculous - this math is required # Devide the actual timestamp (in my case it's expires_utc column in cookies table) by 1000000 // And someone should explain my why. # Subtract 11644473600 # DONE! Now you got UNIX timestamp new_expiry = c.expires / 1000000 new_expiry -= 11644473600 c.expires = new_expiry result = setup_session(contest_id, cookies) logger.debug("type(result): {}".format(type(result))) if result is False: logger.debug("Broken from SECOND browsercookie method") else: logger.debug("SECOND browsercookie method worked!!") return result
def load_browser_cookies(self): jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) jar = self.jars['firefox'] firefox_cookiejar = browsercookie.chrome() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def get_data(weixin): ''' filter: hasDeal: false keyName: 范冰冰 order: relation nonce: 903ae515b xyz: a2e23ef3362c8e341148a64084386c1b ''' nonce = get_nonce() xyz = get_xzy(nonce) print(xyz) url = 'https://www.newrank.cn/xdnphb/data/weixinuser/searchWeixinDataByCondition' data = { 'filter': '', 'hasDeal': 'false', 'keyName': weixin, 'order': 'relation', 'nonce': nonce, 'xyz': xyz, } print(data) cookies = browsercookie.chrome() resonse = requests.post(url, data=data, cookies=cookies) if resonse.status_code == 200: print(resonse.text) else: print('请求错误')
def get_chrome_cookie(websize): """ 需要将直接获取浏览器的cookie :return:dict """ domain = '.{}.{}'.format( tldextract.extract(websize).domain, tldextract.extract(websize).suffix) cookies = browsercookie.chrome() items = dict() for cookie in cookies: item = items.get(cookie.domain, []) item.append({ 'domain': cookie.domain, 'expiry': cookie.expires, 'path': cookie.path, 'name': cookie.name, 'secure': cookie.secure, 'value': cookie.value }) items[cookie.domain] = item data = items.get(domain, []) if not data: return False return data
def main(): args = arguments.Args() if args.get(0) == 'firefox': puts('Grabbing cookies from Firefox') jar = browsercookie.firefox() elif args.get(0) == 'chrome': puts('Grabbing cookies from Chrome') jar = browsercookie.chrome() else: puts('Grabbing cookies from Firefox') jar = browsercookie.firefox() url = 'https://www.safaribooksonline.com/a/export/csv/' puts('\nWaiting for download to begin... (may take a while)') with blindspin.spinner(): r = requests.get(url, stream=True, cookies=jar) total_size = int(r.headers.get('content-length', 0)) filename = 'safari.csv' with open(filename, 'wb') as out_file: for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_size/1024) + 1): if chunk: out_file.write(chunk) out_file.flush() puts('File saved to {filename}\n'.format(filename=filename))
def pull_soup_data(filename, ENDPOINT, ignore_file=False): """Either pull file from html or from file.""" soup = None if ignore_file or not path.isfile(filename): print("{} does not exist. Pulling from endpoint [{}]".format( filename, ENDPOINT)) # set cookies based on Chrome session cookies = browsercookie.chrome() # send GET request r = requests.get(ENDPOINT, cookies=cookies) status = r.status_code # if not successful, raise an exception if status != 200: raise Exception( 'Requests status != 200. It is: {0}'.format(status)) # dump html to file to avoid multiple requests with open(filename, 'w') as outfile: print(r.text, file=outfile) soup = BeautifulSoup(r.text, 'html5lib') else: print("File exists [{}]. Nice!".format(filename)) # load html from file with open(filename, 'r') as html_file: soup = BeautifulSoup(html_file, 'html5lib') return soup
def __init__(self): cookiejar = browsercookie.chrome() self.cookie_dict = {} for cookie in cookiejar: if cookie.domain == '.dianping.com': self.cookie_dict[cookie.name] = cookie.value self.cookie_dict['navCtgScroll'] = '0'
def load_browser_cookies(self): #加载chrome浏览器中的cookies jar = self.jars['chrome'] chorme_cookiejar = browsercookie.chrome() #firefox_cookiejar=browsercookie.firefox() for cookie in chorme_cookiejar: jar.set_cookie(cookie)
def getCookie(website_url, cookie_path): myNeedDomainDict = {} targetDomain = website_url.split('/')[-1] for _ in browsercookie.chrome([cookie_path]): if targetDomain in _.domain: myNeedDomainDict[_.name] = _.value return myNeedDomainDict
def pull_contest_zip(filename, contest_id): contest_csv_url = 'https://www.draftkings.com/contest/exportfullstandingscsv/{0}'.format( contest_id) # ~/Library/Application Support/Google/Chrome/Default/Cookies # Uses Chrome's default cookies filepath by default # cookies = chrome_cookies(contest_csv_url, cookie_file='~/Library/Application Support/Google/Chrome/Default/Cookies') cookies = browsercookie.chrome() # retrieve exported contest csv r = requests.get(contest_csv_url, cookies=cookies) # request will be a zip file z = zipfile.ZipFile(io.BytesIO(r.content)) for name in z.namelist(): # csvfile = z.read(name) outfile = "{}_finished.txt".format(contest_id) z.extract(name) with z.open(name) as csvfile: print(name) print(csvfile) lines = io.TextIOWrapper(csvfile, newline='\r\n') cr = csv.reader(lines, delimiter=',') my_list = list(cr) return my_list
def __load_cookie_jar(self): if self.__browser == 'chrome': self.__cj = chrome() elif self.__browser == 'safari': self.__cj = safari() elif self.__browser == 'firefox': self.__cj = firefox() else: raise ValueError
def get_cookies(): """ 手动操作浏览器,用browsercookie获取浏览器cookie :return: """ ck = browsercookie.chrome() for i in ck: if 'taobao' in i.domain: session.cookies.set(i.name, i.value)
def __init__(self,delay=DEFAULT_DELAY,user_agent=DEFAULT_AGENT,proxies=None,num_retries=DEFAULT_RETRIES,timeout=DEFAULT_TIMEOUT,opener=None,cache=None): socket.setdefaulttimeout(timeout) self.throttle = Throttle(delay) self.user_agent=user_agent self.proxies = proxies self.num_tries=num_retries self.cache = cache self.opener = opener self.cj = browsercookie.chrome()
def load_browser_cookies(self): # chrome浏览器cookie记录 chrome_jar = self.jars['chrome'] for c in browsercookie.chrome(): chrome_jar.set_cookie(c) # firefox浏览器cookie记录 firefox_jar = self.jars['firefox'] for c in browsercookie.firefox(): firefox_jar.set_cookie(c)
def main(): parser = argparse.ArgumentParser(description='Monitor clipboard and fetch urls matching regex') parser.add_argument('-f', "--file", action='store', help='output file path', default='list.list') group = parser.add_mutually_exclusive_group() group.add_argument('-m', "--match-repository", action='store', help='file containing match patterns, reach line one pattern', default='.regexlist') group.add_argument('-r', "--match-regex", action='store', help='Regex to match urls') group = parser.add_mutually_exclusive_group() group.add_argument('-F', '--firefox-cookies', action='store_true', default=False, help='Use firefox cookies for title fetch requests') group.add_argument('-C', '--chrome-cookies', action='store_true', default=False, help='Use Chrome cookies for title fetch requests') group.add_argument('-J', '--jar-cookies', action='store', default=False, help='Use JAR file as cookies for title fetch requests') parser.add_argument('-t', '--notify-title', required=(not set(sys.argv).isdisjoint(('-F', '--firefox-cookies', '-C', '--chrome-cookies', '-J', '--jar-cookies'))), default=False, action='store_true', help='Show page title in desktop notifications (slower notifications)') parser.add_argument('-n', '--notify', required=(not set(sys.argv).isdisjoint(('-t', '--notify-title'))), default=False, action='store_true', help='Show desktop notifications') args = parser.parse_args() outfile = args.file if args.match_regex: mon_domains = [args.match_regex] else: mon_domains = tuple(open(args.match_repository, 'r')) if args.firefox_cookies: cj = browsercookie.firefox() elif args.chrome_cookies: cj = browsercookie.chrome() elif args.jar_cookies: cj = args.jar_cookies else: cj = None with ClipboardWatcher(mon_domains, 1., outfile, args.notify, args.notify_title, cj) as watcher: print("Start monitoring ...") watcher.start() while True: try: # print("Waiting for changed clipboard...") time.sleep(.1) except KeyboardInterrupt: print("Exit request received. cleaning up ...") watcher.stop() watcher.join() print("Finished") break
def crawl(self, page): cookie_k = [] cookie_v = [] for cookie in browsercookie.chrome(): if 'www.lagou.com'.rfind(str(cookie.domain)) != -1: # print("cookie:" + str(cookie.domain)) # print("cookie:" + str(cookie.name)) cookie_k.append(cookie.name) cookie_v.append(cookie.value) cookies = dict(zip(cookie_k, cookie_v)) head = dict() head[ 'Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' head[ 'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36' head['Accept'] = 'application/json, text/javascript, */*; q=0.01' head['Accept-Encoding'] = 'gzip, deflate, br' head['Accept-Language'] = 'zh-CN,zh;q=0.9' head['X-Requested-With'] = 'XMLHttpRequest' head['X-Anit-Forge-Token'] = 'None' head['X-Anit-Forge-Code'] = '0' head['X-Requested-With'] = 'XMLHttpRequest' head[ 'Referer'] = 'https://www.lagou.com/jobs/list_%s?labelWords=&fromSearch=true&suginput=' % self.job head['Origin'] = 'https://www.lagou.com' data = dict() if page == '1': data['first'] = 'true' else: data['first'] = 'false' data['pn'] = page data['kd'] = self.job print("cookies:" + str(cookies)) print("header:" + str(head)) print('data:' + str(data)) resp = requests.post( url= "https://www.lagou.com/jobs/positionAjax.json?px=default&%s&needAddtionalResult=false" % self.city, cookies=cookies, headers=head, data=data) print("resp:" + str(resp.content)) # result = json.loads(resp.content)['content']['positionResult']['result'] if 'success' in json.loads(resp.content): result = json.loads( resp.content)['content']['positionResult']['result'] for r in result: self.writeExcel(r) # print("excelName:"+self.excelName) self.book.save(self.excelName) else: print("error:" + json.loads(resp.content)['msg'])
def load_browser_cookies(self): # ¼ÓÔØ Chrome ä¯ÀÀÆ÷ÖÐµÄ Cookie jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) # ¼ÓÔØ Firefox ä¯ÀÀÆ÷ÖÐµÄ Cookie jar = self.jars['firefox'] firefox_cookiejar = browsercookie.firefox() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def login_with_browser(self): logging.info("Try loading Chrome Cookie") cookies = browsercookie.chrome() for i in cookies: i.expires = None self.s.cookies = cookies if "欢迎您" in self.s.get("http://portal.uestc.edu.cn/").text: self.logger.info("login success") return True else: self.logger.info("login failed") return False
def __init__(self, name=None, **kwargs): cookiejar = browsercookie.chrome() self.cookie_dict = {} for cookie in cookiejar: if cookie.domain == '.qidian.com': if cookie.name in ['_csrfToken', 'e1', 'e2', 'newstatisticUUID', 'ywguid', 'ywkey']: self.cookie_dict[cookie.name]= cookie.value
def load_broser_cookies(self): # 把chrome中的cookie全部迭代赋值给jar jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) # 把firefox中的cookie全部迭代赋值给jar jar = self.jars['firefox'] firefox_cookiejar = browsercookie.firefox() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def load_browser_cookies(self): # 加载 Chorme 浏览器中的 cookie jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) # 加载 Firefox 浏览器中的 Cookie jar = self.jars['firefox'] firefox_cookiejar = browsercookie.firefox() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def get_page_content_2(url): ''' pip install browsercookie2 s = star.net.get_page_content_2('www.baidu.com') :param url: :return: ''' import browsercookie cj = browsercookie.chrome() page = requests.get(url, cookies=cj) return page.content
def load_browser_cookies(self): # 加载Chrome 浏览器中的Cookie jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) # 加载Firefox 浏览器中的Cookie jar = self.jars['firefox'] firefox_cookiejar = browsercookie.firefox() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def __init__(self): cookiejar = browsercookie.chrome()#获取Chrome浏览器中的Cookie self.cookie_dict = {}#字典:保存起点中文网的Cookie # 遍历Chrome中所有的Cookie,获取起点中文网的Cookie for cookie in cookiejar: if cookie.domain == ".qidian.com":#域名为起点中文网 if cookie.name in ["_csrfToken", "e1", "e2", "newstatisticUUID", "ywguid", "ywkey"] : self.cookie_dict[cookie.name] = cookie.value
def __init__(self): cookiejar=browsercookie.chrome()#调用chrome方法,返回的是一个cookiejar.Cookiejar类 self.cookie_dict={} for cookie in cookiejar:#cookiejar可以直接遍历,里面是一个个形如 <Cookie key=value domain>的cookie对象 #访问该对象的属性cookie.name cookie.value cookie.value if cookie.domain==".qidian.com": if cookie.name in ["_csrfToken", "newstatisticUUID", "ywkey", "ywguid", "ywopenid", "e1", "e2"] : self.cookie_dict[cookie.name]=cookie.value
def __init__(self): self.gsongs = set() self.spsongs = set() with open('creds.json') as f: self.creds = json.loads(f.read()) self.cookie_jar = browsercookie.chrome() self.gapi = Mobileclient() self.glogged_in = self.gapi.login(self.creds['gmusic_username'], self.creds['gmusic_password'], Mobileclient.FROM_MAC_ADDRESS) self.spcc = spoauth2.SpotifyClientCredentials(client_id=self.creds['spotify_client_id'], client_secret=self.creds['spotify_client_secret']) self.spapi = spotipy.Spotify(auth=self.spcc.get_access_token()) self.force_load_gmusic_playlist() self.load_spotify_playlist()
def load_browser_cookies(self): """ 使用self.jars['chrome']和self.jars['firefox']从默认字典中获得两个CookieJar对象, 然后调用browsercookie的chrome和firefox方法,分别获取两个浏览器中的Cookie,将它们填入各自的CookieJar对象中。 """ # 加载Chrome 浏览器中的Cookie jar = self.jars['chrome'] chrome_cookiejar = browsercookie.chrome() for cookie in chrome_cookiejar: jar.set_cookie(cookie) # 加载Firefox 浏览器中的cookie jar = self.jars['firefox'] firefox_cookiejar = browsercookie.firefox() for cookie in firefox_cookiejar: jar.set_cookie(cookie)
def download_salary_csv(filename, csv_url): """Given a filename and CSV URL, request download of CSV file and save to filename.""" # set cookies based on Chrome session cookies = browsercookie.chrome() # send GET request r = requests.get(csv_url, cookies=cookies) status = r.status_code # if not successful, raise an exception if status != 200: raise Exception('Requests status != 200. It is: {0}'.format(status)) # dump html to file to avoid multiple requests with open(filename, 'w') as outfile: print("Writing r.text to {}".format(filename)) print(r.text, file=outfile)
def send_msg(msg): send_msg_url = 'https://www.cnblogs.com/mvc/PostComment/Add.aspx' form_data = { 'blogApp': 'listenfwind', 'postId': '11079061', 'body': msg, 'parentCommentId': 0 } cookiejar = browsercookie.chrome() headers = { 'user-agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36" } return requests.post(send_msg_url, data=form_data, cookies=cookiejar, headers=headers)
def fetch_cookie(): """ Fetch cookie from `cookie.cache` """ global headers if os.path.exists('./cookie.cache'): with open('./cookie.cache', 'r') as cookie: headers['Cookie'] = cookie.read() else: chrome_cookie = browsercookie.chrome() for cookie in chrome_cookie: if cookie.name == '_xiaozhuanlan_session': xzl_session = cookie.name + '=' + cookie.value with open('./cookie.cache', 'w') as f: f.write(xzl_session) headers['Cookie'] = xzl_session if not xzl_session: print('\n\n\n\n请先在Chrome上登录小专栏\n\n\n\n')
def start_parse_job(self, response): url_jobs = response.css( '.sidebar .mainNavs .menu_box .menu_sub dd a.curr') cookie_k = [] cookie_v = [] for cookie in browsercookie.chrome(): if ('www.lagou.com'.rfind(str(cookie.domain)) != -1): # print("cookie:" + str(cookie.domain)) # print("cookie:" + str(cookie.name)) cookie_k.append(cookie.name) cookie_v.append(cookie.value) self.cookies = dict(zip(cookie_k, cookie_v)) headers = { "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', } splash_args = { 'wait': 5, "http_method": "GET", # "images":0, "render_all": 1, "headers": headers, 'lua_source': lua_script, "cookies": self.cookies, } self.splash_args = splash_args for url_job in url_jobs: classify_href = url_job.xpath('@href').extract_first() classify_name = url_job.xpath('text()').extract_first() url = classify_href + "1/?filterOption=2" yield SplashRequest(url=url, endpoint='execute', meta={ 'classify_name': classify_name, 'classify_href': classify_href }, callback=self.parse_total_page, dont_filter=True, args=splash_args, cache_args=['lua_source'])
def get_instance_url_and_session(): cookies = browsercookie.chrome() sids = [ c for c in cookies if c.name == 'sid' and re.search(r'salesforce.com', c.domain) ] if len(sids) == 0: raise Exception( "Couldn't find any `sid` cookie for a `salesforce.com` domain; please log in to Salesforce with Chrome, and wait up to 30 seconds." ) if len(sids) > 1: raise Exception("Found multiple `sid` cookies:" + "".join("\n{} {}".format(c.domain, c.value) for c in sids)) instance_url = 'https://' + sids[0].domain session_id = sids[0].value _logger.info("Found instance_url ({}) and session_id".format( instance_url, session_id)) return (instance_url, session_id)
def threadget(self): while True: param = self.q_req.get() with self.lock: self.running += 1 try: #ans = self.opener.open(param.get('url')).read() if (param.get('browser') == 'chrome'): cookies = browsercookie.chrome() else: cookies = browsercookie.firefox() if cookies is None: ans = requests.get(param.get('url'), headers=self.headers).content else: ans = requests.get(param.get('url'), cookies=cookies, headers=self.headers).content except Exception as e: self.q_ans.put((param.get('url'), param.get('current_page'), param.get('end_page'), e)) else: self.q_ans.put((param.get('url'), param.get('current_page'), param.get('end_page'), ans)) with self.lock: self.running -= 1 #self.opener.close() self.q_req.task_done() time.sleep(0.1) # don't spam
if re.match(r"https?://.+", args.file_or_url): response = session.post("http://www.gpsvisualizer.com/convert?output", data={ "convert_format": "gpx", "remote_data": args.file_or_url, }).text m = re.search(r"/download/convert/([0-9\-]+)-data\.gpx", response) if m: gpx_file = StringIO.StringIO(session.get("http://www.gpsvisualizer.com" + m.group(0)).text) else: raise Exception("Unable to convert URL to GPX") else: gpx_file = open(args.file_or_url, "r") gpx = gpxpy.parse(gpx_file) session.cookies = browsercookie.chrome() for cookie in session.cookies: cookie.expires = None if len(gpx.tracks) > 1: individual_routes = raw_input("Route has %d tracks, make them individual routes? (1/0) " % len(gpx.tracks)) == "1" else: individual_routes = False if individual_routes: routes = [("%s %02d" % (args.name, i + 1), sum([segment.points for segment in track.segments], [])) for i, track in enumerate(gpx.tracks)] else: routes = [(args.name, sum([sum([segment.points for segment in track.segments], []) for track in gpx.tracks], []))] for name, route in routes:
#!/usr/bin/env python # -*- coding:utf-8 -*- import requests import browsercookie import re import sys reload(sys).setdefaultencoding("utf-8") cj = browsercookie.chrome() dict115 = cj._cookies[u'.115.com'][u'/'] cid = dict115['CID'].value oefl = dict115['OOFL'].value seid = dict115['SEID'].value uid = dict115['UID'].value data = {'cookies': {'CID': cid, 'OEFL': oefl, 'SEID': seid, 'UID': uid}} import json with open('/Users/kim/.115.cookies', 'w') as f: json.dump(data, f, ensure_ascii=False)
def load_cookies(self): self.session.cookies = browsercookie.chrome()