def __init__(self, cookies=None, credentials=None): """ Constructs and returns an :class:`AmazonMusic <AmazonMusic>`. This will use a cookie jar stored, by default, in the home directory. :param credentials: Two-element array of username/password or lambda that will return such. :param cookies: (optional) File path to be used for the cookie jar. """ local_dir = os.path.dirname(os.path.realpath(__file__)) def _cookie_path(extension): return cookies or '{}/.amazonmusic-cookies.{}'.format( os.environ.get('HOME', os.environ.get('LOCALAPPDATA', local_dir)), extension) cookie_path = _cookie_path('dat') self.session = requests.Session() if os.path.isfile(cookie_path): self.session.cookies = LWPCookieJar(cookie_path) self.session.cookies.load() else: cookie_path = _cookie_path('moz.dat') self.session.cookies = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.session.cookies.load() target_cookie = next( (c for c in self.session.cookies if c.name == COOKIE_TARGET), None) if target_cookie is None: target_cookie = Cookie(1, COOKIE_TARGET, AMAZON_MUSIC, '0', False, ':invalid', True, ':invalid', '', False, True, 2147483647, False, 'Used to store target music URL', 'https://github.com/Jaffa/amazon-music/', {}) # -- Fetch the homepage, authenticating if necessary... # self.__c = credentials r = self.session.get(target_cookie.value, headers=self._http_headers(None)) self.session.cookies.save() os.chmod(cookie_path, 0o600) app_config = None while app_config is None: while r.history and any(h.status_code == 302 and AMAZON_SIGNIN in h.headers['Location'] for h in r.history): r = self._authenticate(r) # -- Parse out the JSON config object... # for line in r.iter_lines(decode_unicode=True): if 'amznMusic.appConfig = ' in line: app_config = json.loads( re.sub(r'^[^{]*', '', re.sub(r';$', '', line))) break if app_config is None: raise Exception("Unable to find appConfig in {}".format( r.content)) if app_config['isRecognizedCustomer'] == 0: r = self.session.get(AMAZON_MUSIC + AMAZON_FORCE_SIGNIN, headers=self._http_headers(r)) app_config = None self.__c = None # -- Store session variables... # self.deviceId = app_config['deviceId'] self.csrfToken = app_config['CSRFTokenConfig']['csrf_token'] self.csrfTs = app_config['CSRFTokenConfig']['csrf_ts'] self.csrfRnd = app_config['CSRFTokenConfig']['csrf_rnd'] self.customerId = app_config['customerId'] self.deviceType = app_config['deviceType'] self.territory = app_config['musicTerritory'] self.locale = app_config['i18n']['locale'] self.region = REGION_MAP.get(app_config['realm'], app_config['realm'][:2]) self.url = 'https://' + app_config['serverInfo']['returnUrlServer'] target_cookie.value = self.url self.session.cookies.set_cookie(target_cookie) self.session.cookies.save()
def __init__(self, server_url, cookie_file=None, cookie_umask=None): self.server_url = server_url self.cookie_jar = LWPCookieJar() self.session = build_opener(HTTPCookieProcessor(self.cookie_jar)) if cookie_file: self._use_cookie_file(cookie_file, cookie_umask)
if __name__ == "__main__": # python2 和 python3的兼容代码 try: # python2 中 import cookielib print(f"user cookielib in python2.") except: # python3 中 from http.cookiejar import LWPCookieJar print(f"user cookielib in python3.") # session代表某一次连接 mafengwoSession = requests.session() # 因为原始的session.cookies 没有save()方法,所以需要用到cookielib中的方法LWPCookieJar,这个类实例化的cookie对象,就可以直接调用save方法。 mafengwoSession.cookies = LWPCookieJar(filename='mafengwoCookies.txt') header = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', 'referer': 'https://passport.mafengwo.cn/', } try: mafengwoSession.cookies.load('mafengwoCookies.txt') except: mafengwo_login() mafengwoSession.cookies.load('mafengwoCookies.txt') isLogin = loginState() if isLogin == False: print('登陆失败,重新输入用户名和密码') mafengwo_login()
os.makedirs(USER_DATA_DIR) COOKIE_FILE = os.path.join(USER_DATA_DIR, "lwp_cookies.dat") CACHE_FILE = os.path.join(USER_DATA_DIR, "cache") expire_after = timedelta(hours=2) user_agent = "Mozilla/5.0 (Linux; Android 5.1.1; AFTT Build/LVY48F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36" auth_url = a2b_hex("68747470733a2f2f6170692e6d6f6264726f2e73782f7574696c732f61757468").decode("utf-8") lb_url = a2b_hex("68747470733a2f2f6170692e6d6f6264726f2e73782f7574696c732f6c6f616462616c616e636572").decode("utf-8") list_url = a2b_hex("68747470733a2f2f6170692e6d6f6264726f2e73782f73747265616d626f742f76342f73686f77").decode("utf-8") app_signature = str(0xceed20e0) # 0xa2317cf8 0xeaa75001 0x22d78bd5 0x1e2f98cc s = requests_cache.CachedSession(CACHE_FILE, allowable_methods="POST", expire_after=expire_after, old_data_on_error=True, ignored_parameters=["token"]) s.hooks = {"response": lambda r, *args, **kwargs: r.raise_for_status()} s.headers.update({"User-Agent": user_agent}) s.cookies = LWPCookieJar(filename=COOKIE_FILE) if os.path.isfile(COOKIE_FILE): s.cookies.load(ignore_discard=True, ignore_expires=True) auth_token_time = int(addon.getSetting("auth_token_time") or "0") auth_token = addon.getSetting("auth_token") current_time = int(time.time()) if current_time - auth_token_time > 7200: with s.cache_disabled(): r = s.post(auth_url, data={"signature": app_signature}, timeout=10) if r.content.strip(): auth_token = r.json().get("token") addon.setSetting("auth_token_time", str(current_time)) addon.setSetting("auth_token", auth_token)
# # ignore_expires=True 即便目标cookie已将在文件中存在 仍然对其写入 # # ignore_discard=True 即便cookie将要/已经过期 仍然将其写入 # cookie_obj.save(ignore_expires=True,ignore_discard=True) # # # 使用本地cookie进行请求 # cookie = LWPCookieJar() # cookie.load('neihan.txt') # request = Request('http://neihanshequ.com') # cookie_handler = HTTPCookieProcessor(cookie) # opener = build_opener(cookie_handler) # response = opener.open(request) # print(response.read()) # 模拟登陆美食街------------------------------------------------------------------- cookie =LWPCookieJar(filename='meishi.txt') cookie_handler = HTTPCookieProcessor(cookie) opener = build_opener(cookie_handler) headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } post_url = 'https://i.meishi.cc/login.php?redirect=https%3A%2F%2Fi.meishi.cc%2Flogin.php%3Fac%3Dzhuce' # urlencode( 对url当中的参数进行编码) # quote 对url当中的中文进行编码 # urlencode()编码的对象为字典类型 # quote编码的对象为字符串 post_data = urlencode({ 'username':'',#自己注册去 'password':'' }) # 请求url 并传参 设置编码方式
debug = False if debug: logger = logging.getLogger("mechanize") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) br = mechanize.Browser() if debug: br.set_debug_http(True) br.set_debug_responses(True) br.set_debug_redirects(True) cj = LWPCookieJar('cookie.txt') br.set_cookiejar(cj) user_manager_url = 'http://gateway/system_usermanager.php' response = br.open(user_manager_url) # Check if it gives us back the login page. if so, authenticate. html = response.read() #print(html) #print('login.css' in html.decode('utf-8')) if 'login.css' in html.decode('utf-8'): br.open('http://gateway/index.php') br.select_form(class_='login') br.form['usernamefld'] = 'admin' passw = getpass.getpass(prompt='pfsense admin password:'******'passwordfld'] = passw
def init_basicauth(config, config_mtime): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(self, *args, **kwargs): # check if this is a recursive call (note: we do not # have to care about thread safety) is_rec_call = getattr(self, '_orig_stdout', None) is not None try: if not is_rec_call: self._orig_stdout = sys.stdout sys.stdout = StringIO() meth(self, *args, **kwargs) hdr = sys.stdout.getvalue() finally: # restore original stdout if not is_rec_call: sys.stdout = self._orig_stdout del self._orig_stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if sys.version_info < (2, 6): # HTTPS proxy is not supported in old urllib2. It only leads to an error # or, at best, a warning. if 'https_proxy' in os.environ: del os.environ['https_proxy'] if 'HTTPS_PROXY' in os.environ: del os.environ['HTTPS_PROXY'] if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) if int(round(config_mtime)) > int(os.stat(cookie_file).st_mtime): cookiejar.clear() cookiejar.save() except IOError: try: fd = os.open(cookie_file, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600) os.close(fd) except IOError: # hmm is any good reason why we should catch the IOError? #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
def main(): # HTTPS准备 cookie_support = urllib.request.HTTPCookieProcessor(LWPCookieJar()) opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(CookieJar())) opener.addheaders = [( 'User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' )] urllib.request.install_opener(opener) # 模仿微信网页版登陆: 获取UUID print('正在获取二维码图片...') uuid = getUUID() showQRImage(uuid) # 轮询地等待用户扫描二维码 while waitForLogin(uuid) != '200': pass # 用户成功登陆后, 删除二维码文件 os.remove(os.path.join(os.getcwd(), 'qrcode.jpg')) # 错误捕获 if not login(): print('登录失败') return if not webwxinit(): print('初始化失败') return print('正在获取数据,请稍后...') MemberList = GetWechatContacts() MemberCount = len(MemberList) print('共有 ' + str(MemberCount) + '个群组') Usernames = [] Usernames.append('') PeopleList = [] PeopleListMem = [] startLocale = 1 endLocale = 100 nowLocale = 1 MemberList = [] if qunFile != "": sublist = {} for line in open(qunFile, 'r'): line = line.split(',') sublist['UserName'] = line[0] sublist['NickName'] = line[1] MemberList.append(sublist) for People in MemberList: #对于每个群 if nowLocale < startLocale: nowLocale += 1 continue if nowLocale >= endLocale: break Usernames[0] = People['UserName'] for person in batchInfo(1, Usernames)[0]['MemberList']: PeopleList.append(person['UserName']) print(People['NickName'], " ", len(batchInfo(1, Usernames)[0]['MemberList'])) try: fcsv = file('weixin.csv', 'a') except: fcsv = open('weixin.csv', 'a') try: fcsv.write(People['NickName']) fcsv.write(',') fcsv.write(str(len(batchInfo(1, Usernames)[0]['MemberList']))) fcsv.write('\n') fcsv.close() except: pass time.sleep(1) # 调用太频繁容易被封禁半小时 try: PeopleListMem.append( (People['NickName'], len(batchInfo(1, Usernames)[0]['MemberList']))) # 有可能出现错误, 待查 except: pass nowLocale += 1 PeopleListMem = sorted(PeopleListMem, key=lambda People: People[0]) try: foutput = file('log.txt', 'a') except: foutput = open('log.txt', 'a') last = "" count = 0 for People in PeopleListMem: #对于每个群 if (last == People[0]): continue foutput.write(People[0]) foutput.write(",") foutput.write(str(People[1])) foutput.write("\n") last = People[0] count = count + 1 foutput.close() print('通讯录共%s个群聊' % count) print("运行完毕, 详见log...") useless = raw_input('')
import re from getpass import getpass # Mapping between SQL and numpy types numpy_dtype = { b"real": np.float32, b"float": np.float64, b"int": np.int32, b"bigint": np.int64, b"char": np.dtype("|S256"), b"nvarchar": np.dtype("|S256") } # Cookie storage - want to avoid creating a new session for every query cookie_file = "sql_cookies.txt" cookie_jar = LWPCookieJar(cookie_file) try: cookie_jar.load(ignore_discard=True) except IOError: pass class _WebDBConnection: def __init__(self, username, password=None): """Class to store info required to connect to the web server""" # Get password if necessary if password is None: password = getpass() # Get URL for the database self.db_url = "http://galaxy-catalogue.dur.ac.uk:8080/Eagle" # Set up authentication and cookies
def __init__(self, info=None, request_charset='utf-8', response_charset=None): self._counter = 0 self._cookies_filename = '' self._cookies = LWPCookieJar() self.url = None self.user_agent = USER_AGENT self.content = None self.status = None self.username = None self.token = None self.passkey = None self.info = info self.proxy_url = None self.request_charset = request_charset self.response_charset = response_charset self.needs_proxylock = False self.headers = dict() self.request_headers = None self.session = requests.session() self.session.verify = False # Enabling retrying on failed requests retries = Retry(total=3, read=2, connect=2, redirect=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504]) self.session.mount('http://', HTTPAdapter(max_retries=retries)) self.session.mount('https://', HTTPAdapter(max_retries=retries)) # self.session = cfscrape.create_scraper() # self.scraper = cfscrape.create_scraper() # self.session = self.scraper.session() global dns_public_list global dns_opennic_list dns_public_list = get_setting("public_dns_list", unicode).replace(" ", "").split(",") dns_opennic_list = get_setting("opennic_dns_list", unicode).replace(" ", "").split(",") # socket.setdefaulttimeout(60) # Parsing proxy information proxy = { 'enabled': get_setting("proxy_enabled", bool), 'use_type': get_setting("proxy_use_type", int), 'type': proxy_types[0], 'host': get_setting("proxy_host", unicode), 'port': get_setting("proxy_port", int), 'login': get_setting("proxy_login", unicode), 'password': get_setting("proxy_password", unicode), } try: proxy['type'] = proxy_types[get_setting("proxy_type", int)] except: pass if get_setting("use_public_dns", bool): connection.create_connection = patched_create_connection if get_setting("use_elementum_proxy", bool): elementum_addon = xbmcaddon.Addon(id='plugin.video.elementum') if elementum_addon and elementum_addon.getSetting( 'internal_proxy_enabled') == "true": self.proxy_url = "{0}://{1}:{2}".format( "http", "127.0.0.1", "65222") if info and "internal_proxy_url" in info: self.proxy_url = info["internal_proxy_url"] self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, } elif proxy['enabled']: if proxy['use_type'] == 0 and info and "proxy_url" in info: log.debug("Setting proxy from Elementum: %s" % (info["proxy_url"])) elif proxy['use_type'] == 1: log.debug("Setting proxy with custom settings: %s" % (repr(proxy))) if proxy['login'] or proxy['password']: self.proxy_url = "{0}://{1}:{2}@{3}:{4}".format( proxy['type'], proxy['login'], proxy['password'], proxy['host'], proxy['port']) else: self.proxy_url = "{0}://{1}:{2}".format( proxy['type'], proxy['host'], proxy['port']) if self.proxy_url: self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, }
# URL showing results for a query BASE_URL = 'https://sms.linguistik.uzh.ch/sms-navigator/cgi-bin/solve.pl?selected=simple&queryType=simple&pageNumber={{page}}&freqMode=all&view=list&urlTest=yes&query=&dummy=SMS&tagger=NO+TAGS&corpus=sms_extended_all&case=No&pageSize=200&mainLang={mainLang}&main_de_varieties={de_variety}&main_fr_varieties=fra-all&main_it_varieties=ita-all&main_rm_varieties=roh-all&nonce_de_varieties=deu-all' lang_parameters = { 'de': dict(code="deu", mainLang="deu", de_variety="deu"), 'fr': dict(code="fra", mainLang="fra", de_variety="deu-all"), 'en': dict(code="eng", mainLang="eng", de_variety="deu-all"), 'it': dict(code="ita", mainLang="ita", de_variety="deu-all"), 'sg': dict(code="gsw", mainLang="deu", de_variety="gsw"), 'any': dict(code="", mainLang="", de_variety="deu-all"), } cookies_file = 'sms.cookies' session = requests.Session() session.cookies = LWPCookieJar(cookies_file) credentials = ('', '') def strip_punctuation(s): """ Remove all punctuation from a sentence (unused) """ exclude = set(string.punctuation) return ''.join(ch for ch in s if ch not in exclude) def get_html(url, page): """ Get the BeautifulSoup object for the given URL, doing authentification if needed """ resp = session.get(url.format(page=page), auth=requests.auth.HTTPBasicAuth(*credentials)) soup = BeautifulSoup(resp.text, 'html.parser') return soup
def __init__(self): #登录的URL self.loginURL = "https://login.taobao.com/member/login.jhtml" #代理IP地址,防止自己的IP被封禁 # self.proxyURL = 'http://120.193.146.97:843' self.proxyURL = 'http://' + MyRequests._get_proxies()['http'] #登录POST数据时发送的头部信息 self.loginHeaders = { 'Host': 'login.taobao.com', 'User-Agent': get_random_pc_ua(), 'Referer': 'https://login.taobao.com/member/login.jhtml', 'Content-Type': 'application/x-www-form-urlencoded', 'Connection': 'Keep-Alive' } self.headers = { '': 'path: /login.htm?_input_charset=utf-8&ttid=h5%40iframe', 'content-length': '3015', 'cache-control': 'max-age=0', 'origin': 'https://login.m.taobao.com', 'upgrade-insecure-requests': '1', 'content-type': 'application/x-www-form-urlencoded', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'referer': 'https://login.m.taobao.com/login.htm?from=sm&ttid=h5@iframe&tpl_redirect_url=https%3A%2F%2Fsec.taobao.com%2Fquery.htm%3Faction%3DQueryAction%26event_submit_do_login%3Dok%26smApp%3Dmalldetailskip%26smPolicy%3Dmalldetailskip-h5-anti_Spider-h5SS-checklogin%26smCharset%3DGBK%26smTag%3DMTgzLjE1OS4xNzcuMTMwLCxiY2Y0NWZkZjVlYmI0ZGE2OTlkZjNkZmUyYTA1ODc4Mg%253D%253D%26captcha%3Dhttps%253A%252F%252Fsec.taobao.com%252Fquery.htm%26smReturn%3Dhttps%253A%252F%252Fdetail.m.tmall.com%252Fitem.htm%253Fid%253D20739535568%26smSign%3DEh51d83i2uzo2b2zGtgKRg%253D%253D', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': 'isg=BAEBfhDFP4K9JlO2YSgx_sw0EEvbhnVHYzo4S2NWsohnSiMcoH_n8l8IKL4Mug1Y', } #用户名 self.username = '' #ua字符串,经过淘宝ua算法计算得出,包含了时间戳,浏览器,屏幕分辨率,随机数,鼠标移动,鼠标点击,其实还有键盘输入记录,鼠标移动的记录、点击的记录等等的信息 self.ua = '107#ssznSzS9s2xAImllPVytXzO1XnIggOmR9LcTi88ngXX9lXFu//94sFxKXrrOg/ZvsLFLTAtXgFXxxXGQ/CxRlaEJXq6nuVE9luz8TdFDyJB+BaUtsYc0P894xppFjjidAOaEIdSveZpfnD1iDG2n7ISDflmisXa8KMKMoU/xPqBLTYtnO8KQVMpYmoBPOShgsCbDNSCXXQGLbLURQXXlDI5M9IjfOd3dPUh9PEUyljo+bL1NmKqXt3/weVQb8v9QkE/6k9DzC5vWQJAec6+1aOnflejd87g7m9jYygKX3ccb1dbwQmh6qYOcuWlw3mQmKlYV2yr7t7xqEmG01vFkqYVr3/Lpu6tg29V22jXmoxcufdUbepynwC/HwUCiCgOKKZCNjE3OCy4bem2r8dEWdr9d3rhGvmt3jpiUK7XJCy4bek5r8dEmdrtGC3iUbfub25COijn8oEkDvpRIdDy/bqy48C+DCf3AiLpng9gn82splff8c6rjantrG/Cf81V6y9FAyii/Xf53Z5Zr0xMzheiteOnGC5FsmE/46EtzI5IZQkAdv7VYrrexvYMzGvQygi+OmTjae5dbF6w0EJjyE8T4lInDXvIvOoVYPyZ7XP1MbY38+FXEEITAla==' #密码,,淘宝对此密码进行了加密处理,256位 self.password2 = '02d12254b4a503974749e4ee16f72e76081dce05e8fe449b487573ba72f14d47f9df56a8377afd48194afa7053aa1829cd9d3a55476a5510128244fea80fbed8a0c798146912122dcce4059be5ba85b39cc7d51fae6629a103d527256a3a48327c6e4cb1350806fa15e9ea07696cdce9c91658718f72b2f325b0d0784730e9fe' self.post = { 'ua': self.ua, 'TPL_checkcode': '', 'CtrlVersion': '1,0,0,7', 'TPL_password': '', 'TPL_redirect_url': 'http://i.taobao.com/my_taobao.htm?nekot=udm8087E1424147022443', 'TPL_username': self.username, 'loginsite': '0', 'newlogin': '******', 'from': 'tb', 'fc': 'default', 'style': 'default', 'css_style': '', 'tid': 'XOR_1_000000000000000000000000000000_625C4720470A0A050976770A', 'support': '000001', 'loginType': '4', 'minititle': '', 'minipara': '', 'umto': 'NaN', 'pstrong': '3', 'llnick': '', 'sign': '', 'need_sign': '', 'isIgnore': '', 'full_redirect': '', 'popid': '', 'callback': '', 'guf': '', 'not_duplite_str': '', 'need_user_id': '', 'poy': '', 'gvfdcname': '10', 'gvfdcre': '', 'from_encoding ': '', 'sub': '', 'TPL_password_2': self.password2, 'loginASR': '1', 'loginASRSuc': '1', 'allp': '', 'oslanguage': 'zh-CN', 'sr': '1366*768', 'osVer': 'windows|6.1', 'naviVer': 'firefox|35' } #将POST的数据进行编码转换 self.postData = urlencode(self.post) #设置代理 self.proxy = ProxyHandler({'http': self.proxyURL}) #设置cookie self.cookie = LWPCookieJar() #设置cookie处理器 self.cookieHandler = HTTPCookieProcessor(self.cookie) #设置登录时用到的opener,它的open方法相当于urllib2.urlopen self.opener = build_opener(self.cookieHandler, self.proxy, HTTPHandler)
import re ### Cookie实例 ### if True: import urllib from http.cookiejar import FileCookieJar from http.cookiejar import MozillaCookieJar from http.cookiejar import LWPCookieJar cookie_file = r"C:\Ruijie\workplace\python3\tmp\urllib_cookie.txt" # 构造cookie实例 # 创建FileCookieJar实例,检索cookie信息并将信息存储到文件中 #cookie = FileCookieJar(cookie_file) # 创建与Mozilla cookies.txt文件兼容的FileCookieJar实例 #cookie = MozillaCookieJar(cookie_file) # 创建与libwww-perl Set-Cookie3文件兼容的FileCookieJar实例 cookie = LWPCookieJar(cookie_file) cookie.load(cookie_file, ignore_discard=True, ignore_expires=True) # 创建cookie处理器 cookie_handle = urllib.request.HTTPCookieProcessor(cookie) # 构建opener opener = urllib.request.build_opener(cookie_handle) # 指定url url = "https://www.baidu.com/" # 构造请求头部 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'Host': 'baidu.com' } # 构造post表单数据 forms = {'word': 'hello'}
def clear_cookies(self): self.cookie_jar = LWPCookieJar() self.set_cookiejar(self.cookie_jar)
def panelsStar(): from http.cookiejar import LWPCookieJar import mechanize from bs4 import BeautifulSoup as bs import pandas as pd br = mechanize.Browser() cj = LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) br.addheaders = [( 'User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' )] br.open('https://www.paneljam.com/users/sign_in/') br.select_form(nr=0) br.form['user[email]'] = '*****@*****.**' br.form['user[password]'] = 'extractor' br.submit() table = pd.read_excel('..\\data\\TabellaCompleta.xlsx', index=False) table = table.sort_values('id_panel', ascending=True) idProg = table['id_prog'].tolist() idProg = list(dict.fromkeys(idProg)) count = 0 stars = [] import re for prog in idProg: print(str(count + 1) + '/' + str(len(idProg))) homepage = br.open('https://www.paneljam.com/jams/' + str(prog) + '/panels').read() homepage = bs(homepage, 'html.parser') panel_wrap = homepage.find_all('div', class_='panel-wrap') rows = table.loc[table['id_prog'] == prog] nRows = max(rows['panel_depth']) print(nRows) index = 0 for panel in panel_wrap: if index != nRows: span = panel.find('span', class_='star') star = int(re.search(r'\d+', span.text).group()) stars.append(star) else: break index = index + 1 count = count + 1 table['panel_stars'] = stars table.to_excel('..\\data\\TabellaCompletaProva.xlsx', index=False) return table
from http.cookiejar import LWPCookieJar from urllib.request import Request, urlopen, HTTPCookieProcessor, build_opener from urllib.parse import urlencode import lxml cookie_obj = LWPCookieJar(filename='cookie.txt') cookie_handler = HTTPCookieProcessor(cookie_obj) opener = build_opener(cookie_handler) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0' } # url = 'https://i.meishi.cc/login.php?redirect=https%3A%2F%2Fi.meishi.cc%2Flogin.php%3Fac%3Dzhuce' # data = urlencode({ # 'username':'******', # 'password':'******' # }) # request = Request(url,headers=headers) # response = opener.open(request,bytes(data,encoding='utf-8')) # # cookie_obj.save(ignore_expires=True,ignore_discard=True) # 加载本地cookie登陆 cookie_obj = LWPCookieJar() cookie_obj.load('cookie.txt') cookie_handler = HTTPCookieProcessor(cookie_obj) opener = build_opener(cookie_handler) url = 'https://i.meishi.cc/login.php?redirect=https%3A%2F%2Fwww.meishij.net%2F'
def register(self, captcha_response, anyid): # subscriptions_id = "69da4e2-9ef8-4f8c-954a-3380fc47ecb0" subscriptions_id = self.randomString(7) + "-" + self.randomString( 4) + "-" + self.randomString(4) + "-" + self.randomString(12) session = requests.Session() session.cookies = LWPCookieJar('cookiejar') cookies = { 'YSB_ELEVATED_PRIVACY': 'false', 'LV': '1.2&idm=1', 'CONSENT': '11111.1593036582106', 'anyid': anyid, '_pin_unauth': 'dWlkPU1URTVNRFJqTmprdE1UTmhOeTAwTUdRekxXRXhabVV0Tm1JM05qWTVOVFE0T1RWag', 'A3': 'd=AQABBA6X814CEKH-o_p9ZcWoV0EGx-YaK3IFEgEBAQHo9F79XgAAAAAA_SMAAAcIDpfzXuYaK3I&S=AQAAAulj0DEWfi_9nk11JG9djT8', 'B': '74aoqspff75oe&b=3&s=s8', '_mkto_trk': 'id:986-MCG-755&token:_mch-yahoo.com-1593022224105-89202', '_ce.s': 'v~UHeC_xAIxdFkEQ00Ryylceqtn1U~ir~1', '_uetsid': '03119019-576b-62eb-a30c-54192c719f25', '_uetvid': '35484e1c-c601-cc71-fc3c-97c7fc02da6a' } headers = { 'Host': 'smallbusiness.yahoo.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Content-Type': 'application/json', 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'https://smallbusiness.yahoo.com', 'Referer': 'https://smallbusiness.yahoo.com/businessmaker/payitforward', 'Upgrade-Insecure-Requests': '1', } headers_cart = { 'Host': 'smallbusiness.yahoo.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Accept': '*/*', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'https://smallbusiness.yahoo.com', 'Referer': 'https://smallbusiness.yahoo.com/businessmaker/payitforward', } # Csrf Generate response = session.get( 'https://smallbusiness.yahoo.com/_xhr/_proxy/api/csrf/token', headers=headers, cookies=cookies) csrf = json.loads(response.text)["data"]["token"] data = { "_csrf": csrf, "businessCategory": "Business Billing & Collection", "businessCategoryId": "96932170", "businessName": "Billing Cagex23" } response = session.post( 'https://smallbusiness.yahoo.com/_xhr/_proxy/api/marketing/business/recommendations', headers=headers, cookies=cookies, json=data) #put request to cart data = '_csrf=' + csrf + '&subscriptions=[{"id":"' + subscriptions_id + '","baseProductId":"bmaker","meta":{},"addRatePlans":[{"ratePlanId":"bmaker_t12m","productId":"bmaker","type":"base"},{"ratePlanId":"acctfree_t1m","productId":"acctfree","type":"addon"},{"ratePlanId":"wsite_t12m","productId":"wsite","type":"addon"},{"ratePlanId":"lwfree_t1m","productId":"lwfree","type":"addon"}]}]&promo=PAYITFORWARD&domainBundles=[]' response = session.put( 'https://smallbusiness.yahoo.com/_xhr/_proxy/api/ordering/cart', headers=headers_cart, cookies=cookies, data=data) session.cookies.save() headers2 = { 'Host': 'platform-api.yahoosmallbusiness.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Content-Type': 'application/json', 'Origin': 'https://checkout.yahoosmallbusiness.com', 'Referer': 'https://checkout.yahoosmallbusiness.com/bmcheckout/login', 'Upgrade-Insecure-Requests': '1', } json_data = '''{ "_csrf": "''' + csrf + '''", "userInfo": { "firstName": "''' + self.FIRST_NAME + '''", "lastName": "''' + self.LAST_NAME + '''", "userid": "''' + self.EMAIL + '''", "passwd": "''' + self.PASSWORD + '''", "marketingConsent": "on", "dataConsent": "off", "agreetos": "on", "bibUser": true }, "bizInfo": { "businessType": "BUSINESS", "name": "''' + self.BUSINESS_NAME + '''", "ownerName": "''' + self.FIRST_NAME + ''' ''' + self.LAST_NAME + '''", "role": "", "category": "''' + self.CATEGORY_NAME + '''", "categoryId": "''' + str(self.CATEGORY_ID) + '''", "subCategory": "''' + self.SUB_CATEGORY_NAME + '''", "subCategoryId": "''' + str(self.SUB_CATEGORY_ID) + '''", "businessProfile": { "workPlace": "", "emplyeeCount": "", "yearsOfOperation": "", "annualRevenue": "", "topConcern": "NONE", "howSell": "", "howReserve": "", "stage": "early", "presenceType": "" } }, "addressInfo": { "address1": "", "address2": "", "addressType": "LOCATION", "presenceType": "PHYSICAL", "city": "", "state": "", "country": "", "zipcode": "" }, "ysbcaptchatoken": "''' + captcha_response + '''" }''' response = session.post( 'https://platform-api.yahoosmallbusiness.com/api/businesses/signup', headers=headers2, cookies=cookies, data=json_data) json_data = json.loads(response.text) id1 = json_data["data"]["bizInfo"]["id"] id2 = json_data["data"]["addressInfo"]["id"] if json_data['success'] == False: self.REGISTER_TRY -= 1 if self.REGISTER_TRY <= 0: return False return self.register(captcha_response, anyid) headers = { 'Host': 'checkout.yahoosmallbusiness.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'https://smallbusiness.yahoo.com/businessmaker/payitforward', 'Upgrade-Insecure-Requests': '1', } response = session.get( 'https://checkout.yahoosmallbusiness.com/bmcheckout?_anyid=' + anyid + '&_ga=2.54325288.1450332677.1593022216-1431927048.1593022216', headers=headers, cookies=cookies) session.cookies.save() # Get Paypal Url response = session.get( 'https://platform-api.yahoosmallbusiness.com/api/user/payment/paypalurl?isBib=true&_csrf=' + csrf, headers=headers2, cookies=cookies) paypal_url = json.loads(response.text)["data"]["paypalUrl"] session.cookies.save() token = paypal_url.rsplit("=", 1)[1] webbrowser.get('firefox').open(paypal_url) time.sleep(60) response = session.get( 'https://checkout.yahoosmallbusiness.com/paypal-bib/redirect/path/success/confirmation/cancel/payment?token=' + token, headers=headers, cookies=cookies, allow_redirects=False) pid = re.search("pid=(.*?)&", response.text).group(1) data = {"_csrf": csrf, "paymentToken": pid, "userEntity": {}} response = session.post( 'https://platform-api.yahoosmallbusiness.com/api/ordering/cart/placeorder', headers=headers2, cookies=cookies, json=data) try: json_data = json.loads(response.text) if json_data["success"]: logger.info(self.EMAIL + "," + self.PASSWORD + "," + self.FIRST_NAME + "," + self.LAST_NAME + "," + self.BUSINESS_NAME + "," + id1 + "," + id2) with open('ids2.csv', 'a', newline='') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow([ self.EMAIL, self.PASSWORD, self.FIRST_NAME, self.LAST_NAME, self.BUSINESS_NAME, id1, id2, "registered" ]) else: logger.error(json_data) except: logger.info(response.text)
url_next_page = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \ "start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s" url_search_num = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \ "num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \ "tbm=%(tpe)s" url_next_page_num = "https://www.google.%(tld)s/search?hl=%(lang)s&" \ "q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&" \ "safe=%(safe)s&tbm=%(tpe)s" # Cookie jar. Stored at the user's home folder. home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) try: cookie_jar.load() except Exception: pass # Default user agent, unless instructed by the user to change it. USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)' # Load the list of valid user agents from the install folder. try: install_folder = os.path.abspath(os.path.split(__file__)[0]) try: user_agents_file = os.path.join(install_folder, 'user_agents.txt.gz') import gzip fp = gzip.open(user_agents_file, 'rb')
import requests import time from http.cookiejar import LWPCookieJar import scrapy from selenium import webdriver import re import json import os current_path = os.path.dirname(__file__) parent_path = os.path.abspath('..') path = os.path.join(os.path.abspath(".."),"data") session = requests.session() session.cookies = LWPCookieJar(filename='cookies.txt') class ZhiHuLoginSpider(scrapy.Spider): name = 'zhuhu_login' start_urls =['www.zhihu.com'] def __init__(self): self.agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36" try: self.Cookie = self.load_cookie() except Exception as e: self.Cookie=None self.header = { "Host":"www.zhihu.com", "Referer":"https://www.zhihu.com", "User-Agent":self.agent,