def get_text_with_link_on_weather_data_file(current_session: Session, ws_id: int, start_date: date, last_date: date, url: str): """ Function create query for site rp5.ru with special params for getting JS text with link on csv.gz file and returns response of query. I use sessionw and headers because site return text - 'Error #FS000;' in otherwise. """ phpsessid = None for x in current_session.cookies.items(): if x[0] == 'PHPSESSID': phpsessid = x[1] if url == 'https://rp5.ru': current_session.headers = rp5_ru_headers.get_header(current_session.cookies.items()[0][1], choice(browsers)) elif url == 'https://rp5.md': current_session.headers = rp5_md_headers.get_header(phpsessid, 'Chrome') else: current_session.headers = rp5_ru_headers.get_header(current_session.cookies.items()[0][1], choice(browsers)) try: result: Response = current_session.post( f"{url}/responses/reFileSynop.php", data={'wmo_id': ws_id, 'a_date1': start_date.strftime('%d.%m.%Y'), 'a_date2': last_date.strftime('%d.%m.%Y'), 'f_ed3': 5, 'f_ed4': 5, 'f_ed5': 17, 'f_pe': 1, 'f_pe1': 2, 'lng_id': 2, }) return result except HTTPError as http_err: print(f'HTTP error occurred: {http_err}') except Exception as err: print(f'Other error occurred: {err}')
def __requestSetup(retries, sleep): session = Session() retry = Retry(total=retries, read=retries, backoff_factor=sleep) adapter = HTTPAdapter(max_retries=retry) session.headers = {'Content-type': 'application/json'} session.mount('http://', adapter) return session
def post_request(route, endpoint=API_ENDPOINT, **kwargs): url = build_url(route, endpoint) payload = build_json(**kwargs) # proxies = {'http': 'http://127.0.0.1:8888/'} headers = { 'User-Agent': generate_ua(get_username(), get_password()), 'Host': 'api.glitchednet.com', 'Connection': 'Keep-Alive', 'Accept-Encoding': 'gzip', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': str(len(payload)) } s = Session() s.headers = headers # s.proxies = proxies try: response = s.post(url, data=payload) if response.ok: return response else: return "" except Timeout: logger.error("Timeout...") return "" except Exception as e: logger.error("Unknown exception: {}".format(e)) exit(0) return ""
def checkInfoFiles(self): ani24InfoFiles = [ 'ani24_genre.txt', 'ani24_producer.txt', 'ani24_year.txt', 'ani24_quarter.txt' ] yaani24InfoFiles = [ 'yaani24_type.txt', 'yaani24_gerne.txt', 'yaani24_year.txt', 'yaani24_quarter.txt', 'yaani24_playway.txt', 'yaani24_clothes.txt', 'yaani24_hairstyle.txt', 'yaani24_haircolor.txt' ] s = Session() s.headers = { 'user-agent':'Mozilla 5.0', 'referer':'https://ani24do.com' } r1 = s.get('https://ani24do.com/ani/search.php?type=all').text s1 = BeautifulSoup(r1, 'html.parser').find_all('select') a24P = [p.text for p in s1[0].find_all('option')[1:]] a24G = [g.text for g in s1[1].find_all('option')[1:]] r2 = s.get('https://ani24do.com/ani/top10.html?type=quarter').text s2 = BeautifulSoup(r2, 'html.parser').find_all('select') a24Q = [q.text for q in s2[0].find_all('option')] a24Y = [y.text for y in s2[1].find_all('option')] r3 = s.get('https://yaani24.net/ani/search.php?type=all').text s3 = BeautifulSoup(r3, 'html.parser').find_all('select') ya24T = [g.text for g in s3[0].find_all('option')] ya24G = [g.text for g in s3[1].find_all('option')] ya24hC = [hC.text for hC in s3[2].find_all('option')] ya24hS = [hS.text for hS in s3[3].find_all('option')] ya24C = [c.text for c in s3[4].find_all('option')] ya24p = [p.text for p in s3[5].find_all('option')] r4 = s.get('https://yaani24.net/ani/top10.html?type=quarter').text s4 = BeautifulSoup(r4, 'html.parser').find_all('select') ya24Q = [q.text for q in s4[0].find_all('option')] ya24Y = [g.text for g in s4[1].find_all('option')] ani24Info = [a24G, a24P, a24Y, a24Q] yaani24Info = [ya24T, ya24G, ya24Y, ya24Q, ya24p, ya24C, ya24hS, ya24hC] for idx, a in enumerate(ani24Info): self.makeInfoFiles(f'./info/{ani24InfoFiles[idx]}', a) for idx, y in enumerate(yaani24Info): self.makeInfoFiles(f'./info/{yaani24InfoFiles[idx]}', y) msg = QMessageBox() msg.about(self, "애니24 다운로더", "새 정보를 불러왔습니다.")
def session(self): session = Session() session.headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer {}'.format(self.token) } return session
def request(self, method, url, data=None, headers=None): if data and not isinstance(data, basestring): data = urlencode(data) if data is not None: data = data.encode() # If we have data and Content-Type is not set, set it... if data and not headers.get('Content-Type', None): headers['Content-Type'] = 'application/x-www-form-urlencoded' # If connection for this scheme+location is not established, establish it. uri = urlparse(url) if not self.connections.get(uri.scheme + uri.netloc): session = Session() session.headers = self.headers.copy() self.connections[uri.scheme + uri.netloc] = session session = self.connections[uri.scheme + uri.netloc] response = session.request(method, url, data=data, headers=headers, timeout=self.timeout) if response.status_code > 399: raise HTTPError(response) return response.text
def login_platform(session: requests.Session, domain: str): """ 登录一号/二号平台 :param session: :param domain: :return:requests.Session """ if domain == domain1: url = login_url1 user_name = user_name1 user_password = user_password1 headers = headers1 check_login_url = check_login_url1 else: url = login_url2 user_name = user_name2 user_password = user_password2 headers = headers2 check_login_url = check_login_url2 session.headers = headers resp = session.get(url) # text = resp.text # print(text) args = {"account": user_name, "password": user_password} resp = session.post(check_login_url, data=args) code = resp.status_code if code != 200: ms = "登录平台{}失败,错误码 {}".format(domain, code) logger.exception(ms) return False else: # mes = resp.text # print(mes) return True
def need_login_platform(session: requests.Session, domain: str) -> bool: """ 一号/二号平台是否需要登录? :param session: :param domain: :return: True需要登录,False,不需要登录 """ if domain == domain1: page_url_base = page_url_base1 headers = headers1 else: headers = headers2 page_url_base = page_url_base2 session.headers = headers r = session.get(page_url_base) code = r.status_code if code != 200: ms = "检查是否需要登录平台{}失败,错误码 {}".format(domain, code) logger.exception(ms) return False else: t = r.content print(r.text) pq = PyQuery(t) u_name = pq.find("form .uname") if u_name.attr("placeholder") == '请输入邮箱或者MT账号': return True else: return False
def create_client(self, web_service) -> Client: session = Session() session.headers = {} transport = Transport(session=session) transport.session.headers = { } # DON'T REMOVE THIS LINE.YOU BLOCK FROM SAMAN BANK IF REMOVE THIS LINE return Client(web_service, transport=transport)
def parse_link(link): a = link.find('url=') b = randint(1, 100) c = link[a + b + 25] url = f'https://weixin.sogou.com{link}&k={b}&h={c}' session = Session() session.get('https://weixin.sogou.com/weixin') session.headers = CaseInsensitiveDict({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/66.0.3359.181 Safari/537.36 ', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive', 'Referer': 'https://weixin.sogou.com/weixin', }) resp = session.get(url) resp.encoding = 'utf-8' url_fragments = findall(r'url \+= \'(.*?)\';', resp.text) # print(sogou_session.cookies) # time.sleep(3) # sogou_session.get('https://weixin.sogou.com/weixin') # sogou_session.headers.update({'Referer': 'https://weixin.sogou.com/weixin'}) # print(sogou_session.headers) # url_fragments = findall(r'url \+= \'(.*?)\';', get_html(url)) return ''.join(url_fragments).replace('@', '')
def send(self, method, url, params=None, token=None): try: session = Session() session.headers = self.headers if token is not None: session.headers.update({'X-USER-TOKEN': token}) endpoint = '{base_url}/{url}'.format( base_url=self.API_ENDPOINT, url=url, ) data = json.dumps(params) if params else None if method == 'get': res = session.get(endpoint, params=data) elif method == 'post': res = session.post(endpoint, data=data) elif method == 'put': res = session.put(endpoint, data=data) elif method == 'delete': res = session.delete(endpoint) session.close() return res except HTTPError as e: self.logger.error(e)
def open_platform(session: requests.Session, domain: str) -> bool: """ 打开一号/二号平台站点,如果已经登录,返回True,否则尝试重新login,三次失败后,返回False :param session: 基础网址 :param domain: :return: 布尔值 True开打成功,False,打开失败,请检查程序 """ if domain == domain1: headers = headers1 login_url = login_url1 else: headers = headers2 login_url = login_url2 session.headers = headers count = 0 flag = not need_login_platform(session, domain) while (not flag) and count < 3: flag = login_platform(session, login_url) if not flag: ms = "登录{}失败".format(login_url) logger.exception(ms) else: pass count += 1 return flag
def GetSession(referer): sess = Session() sess.headers = { 'User-Agent': 'Mozilla 5.0', 'referer': referer, } return sess
def parse_pages(): headers = { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36', 'accept': '*/*' } url = "https://hh.ru/search/vacancy" session = Session() session.headers = headers r = session.get(url) if r.status_code != 200: print('Error occurred while parsing! Check network connection.') return soup = bs(r.content, 'lxml') try: count = int( soup.find_all('a', attrs={'data-qa': 'pager-page'})[-1].text) except: count = 1 pages = Queue() emails = Queue() for i in range(10): thread = VacanciesParser(url, pages, session, emails) thread.setDaemon(True) thread.start() handler = EmailHandler(emails) handler.start() for i in range(count): pages.put(i) emails.join() pages.join()
def get_text_with_link_on_weather_data_file(current_session: Session, ws_id: int, start_date: date, last_date: date): """ Function create query for site rp5.ru with special params for getting JS text with link on csv.gz file and returns response of query. I use session and headers because site return text - 'Error #FS000;' in otherwise. """ current_session.headers = rp5_headers.get_header( current_session.cookies.items()[0][1], 'Chrome') try: result: Response = current_session.post( URL, data={ 'wmo_id': ws_id, 'a_date1': start_date.strftime('%d.%m.%Y'), 'a_date2': last_date.strftime('%d.%m.%Y'), 'f_ed3': 3, 'f_ed4': 3, 'f_ed5': 27, 'f_pe': 1, 'f_pe1': 2, 'lng_id': 2, }) return result except HTTPError as http_err: print(f'HTTP error occurred: {http_err}') except Exception as err: print(f'Other error occurred: {err}')
def run(url, file_path): headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/60.0.3112.90 Safari/537.36', 'connection': 'keep-alive', } proxies = { 'http': 'socks5://127.0.0.1:1080', 'https': 'socks5://127.0.0.1:1080', } session = Session() session.headers = headers session.proxies = proxies pool = ThreadPool(4) fun = partial(ytb_download, session, file_path) if 'playlist' in url: pool.map(fun, get_urls_in_playlist(session, url)) # for video_url in get_urls_in_playlist(session, url): # ytb_download(session, file_path, video_url) else: ytb_download(session, file_path, url) pool.close() pool.join()
def get_link_list(url): # 构造session对象 s = Session() # 设置整个会话的请求头 s.headers = request_header.header try: # 请求页面获得响应 resp = s.get(url) # 设置响应的编码格式 resp.encoding = resp.apparent_encoding # 构造Beautiful对象 bsobj = BeautifulSoup(resp.text, 'html.parser') except: print('无法连接: ' + sys.exc_info()[1]) 5 / 0 else: # 查找所有img标签 imgs = bsobj.find_all('img') # 创建用于装载图片连接的list img_links = [] # 保存所有图片的原始连接 for img in imgs: if 'src' in img.attrs: img_links.append(img['src']) return img_links
def getSearchResults(self,value): # MAKE FIRST ACCESS TO VIDEO TO GET SESSION print("Searching for videos with keyword '%s'..." % value) url1 = 'https://www.xnxx.com/search/'+value headers = { 'User-Agent' : 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1' } s = Session() s.headers = headers resp = s.request('GET',url1) print("First call HTTP status: ["+str(resp.status_code)+"] "+resp.reason) # GET VIDEO LIST videos = {} # video_list = soup1.findAll('div',attrs={'class' : 'thumb-block'}) c = 0 # for video in video_list: # videos[c] = { # 'preview' : video.find('div','thumb_container').get('data-previewvideo'), # 'thumb' : video.find('img','thumb').get('src'), # 'name' : video.find('u').string.replace(",",""), # 'id' : video.find('img','hSprite').get('id'), # 'duration' : video.find('b').string # } # c += 1 return self.getVideosFromHtml(resp.text) # GET PAGE COUNT pages = soup1.find('div','pager').findAll('a') max_page = 1 limit_page = 1 for page in pages: if(page.string is not None): if(page.string.isdigit()): max_page = int(page.string) print('Total pages found: %d' % max_page) if(max_page > limit_page): print('Number of pages truncated to %d' % limit_page) max_page = limit_page # GET MIDDLE PAGES RESULTS if(max_page>1): for x in xrange(2,max_page+1): print("Getting results for page %d..." % x) urlx = 'https://xhamster.com/search?q='+query+'&p='+str(x) respx = s.request('GET',urlx) soupx = BeautifulSoup(respx.text,"html.parser") video_list = soupx.findAll('div',attrs={'class' : 'video'}) for video in video_list: videos[c] = { 'preview' : video.find('div','thumb_container').get('data-previewvideo'), 'thumb' : video.find('img','thumb').get('src'), 'name' : video.find('u').string.replace(",",""), 'id' : video.find('img','hSprite').get('id'), 'duration' : video.find('b').string } c += 1 print('Total videos found: %d' % len(videos)) return videos
def session(self, session_cookies): """Create a new API session with the correct cookies and headers.""" session = Session() # Add cookies and headers to session session.cookies.update(session_cookies) session.headers = self.headers # Return new session object return session
def init_session(): global session if (session == None): session = Session() session.headers = { 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/82.0' }
def _get(self, path: str, params: Optional[dict] = None): session = Session() session.headers = self.headers if params: session.params = params res = session.get(f"{self.base_endpoint}{path}") session.close() return res
def _get_session(): session = Session() session.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/66.0.3359.181 Safari/537.36 ', } return session
def create_session_using_proxy(proxy: str) -> Session: session = Session() session.proxies = {"http": proxy, "https": proxy} session.headers = { "User-Agent": user_agent_rotator.get_random_user_agent() } return session
def _init_session(self, params: Optional[dict] = None) -> Session: session = Session() session.headers = { 'Content-Type': 'application/x-www-form-urlencoded' } if params: session.headers.update(params) return session
def _init_session(self, params: Optional[dict] = None) -> Session: session = Session() session.headers = { 'Authorization': f'Bearer {self.access_token}', 'User-Agent': 'amocrm-api-client/1.0', } if params: session.headers.update(params) return session
def get_session(): token = json.loads(requests.post(access_url, data=access_params).text) sess = Session() sess.headers = { 'Authorization': 'Bearer' + ' ' + token['access_token'], } return sess
def __init__(self): """ Create an LMS API client, authenticated with the API token from Django settings. """ session = Session() session.headers = {"X-Edx-Api-Key": settings.EDX_API_KEY} self.client = EdxRestApiClient(self.API_BASE_URL, append_slash=self.APPEND_SLASH, session=session)
def request(): session = Session() session.headers = { 'User-Agent': 'Keycloak scanner - https://github.com/NeuronAddict/keycloak-scanner' } session.proxies = Request.proxy session.verify = Request.verify return session
def __init__(self, s: requests.Session = None): # 使用Session来进行爬取,方便设置代理 # Session需要设置UA if s == None: s = requests.Session() s.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', } self._session = s
def sessi(self): ses = Session() ses.headers = { 'User-Agent': uag(), 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive' } ses.proxies = self.is_proxy return ses
def createSession(self, cookies): ''' Creates a global session to be used by all requests :param cookies: :return: ''' session = Session() adapter = adapters.HTTPAdapter(pool_connections = 1000, pool_maxsize = 5000) session.mount('https://', adapter) session.headers = self.headers session.cookies = cookies return session
def getSoup(self, url, referer=''): sess = Session() if referer == '': referer = self.setBaseURL() sess.headers = { 'user-agent':'Mozilla 5.0', 'referer':referer, } req = sess.get(url) html = req.text return BeautifulSoup(html, 'html.parser')
def translate_google(texts, from_lang='en', to_lang='zh'): '''Translate with Google Translator. *NOTE*: GOOGLE HAS CANCELED THE FREE TRANSLATION API SERVICE. ALTHOUGH THIS FUNCTION WORKS, IT IS FOR STUDY USE ONLY. USE AT YOUR OWN RISK. This version is modified from https://github.com/mouuff/Google-Translate-API. Args: texts (list): list of texts to translate from_lang (string): input language code to_lang (string): output language code Returns: dict, with key being the input text, value the translated text. ''' agents = { 'User-Agent':"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)" } before_trans = 'class="t0">' link = 'http://translate.google.com/m' params = { 'hl': to_lang, 'sl': from_lang, 'q': '' } sess = Session() sess.headers = { 'User-Agent': "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)" } ret = {} for itm in texts: itm = itm.strip() if itm: params['q'] = itm html = sess.get(link, params=params).text res = html[html.find(before_trans) + len(before_trans):] ret[itm] = res.split("<")[0] # Google has canceled free translation api service, in order # not to be banned from Google server, we pause a bit. time.sleep(random.randint(3, 8)) return ret
def _setup_session(self): s = Session() if 'verify' in self.settings: # default is True s.verify = self.settings['verify'] if 'trust_env' in self.settings: s.trust_env = self.settings['trust_env'] if self._auth is not None: s.auth = self._auth s.headers = merge_setting( # add our headers to requests' default set self._headers, s.headers, dict_class=CaseInsensitiveDict) return s
def main(url, files, user='', passwd=''): # create session if 'www.moodle.tum.de' in url: session = establish_moodle_session(user, passwd) else: session = Session() session.auth = (user, passwd) session.headers = { "Accept-Language": "en-US,en;" } # get file links links = get_file_links(session, url, files) # download files worker = [] for l in links: while active_count() > NUM_THREADS: sleep(0.1) worker.append(Thread(target=download_files, args=(session, l)).start()) [t.join() for t in worker if t]
import json apply_time = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] session = Session() opendoor_url = "http://acs.corelines.cn/phone/v1/home/door/initiative/access?" \ "room_node_id=20232&household_id=9308&apply_time={apply_time}" \ "&transaction_no=a201709271615285909308&" \ "code=9278A73B33124F182260994879203CB332A1BDE3AB69CD1CD8073C52A5D010F3" \ "&door_id=10364&household_token=e056df6d1c90478892b3c934ebae711d".format(apply_time=apply_time) load_url = "http://acs.corelines.cn/phone/v1/my/version/load" load_body = "app_platform=1&apply_time=20170927172443521&" \ "app_name=acs_phone_android&" \ "code=B041597F4FC3CF828314B73269B546EC0A2690B03AF21E101B19D6E0CAF88548".format(apply_time=apply_time) session.headers = {} acckey_url="http://acs.corelines.cn/phone/v1/home/visiting/password/add?room_node_id=20232&household_token=e056df6d1c90478892b3c934ebae711d&visiting_time=20170927000000000&household_id=9308&apply_time=20170927173736609&visiting_name=&code=19DE0F7F0F65F10099B0FCB506E652FCE466F89048CD6F502CE287DE55A9D86C&visiting_phone=" def opare_door(url, data): print url return session.post(url, data=data).json() if __name__ == '__main__': # print json.dumps(opare_door(load_url+"?"+load_body,None), ensure_ascii=False) json.dumps(opare_door(acckey_url,None), ensure_ascii=False) # print json.dumps(opare_door(opendoor_url,None), ensure_ascii=False)
from requests import Session conn = Session() conn.headers = { 'X-Auth-Email': "", 'X-Auth-Key': "", } ENDPOINT = "https://api.cloudflare.com/client/v4/" def get_pages(conn, url): result = [] try: resp = conn.get(ENDPOINT + url) if resp.status_code != 200: return result resp = resp.json() result.extend(resp['result']) total_pages = resp['result_info']['total_pages'] if total_pages > 1: for page in range(2, total_pages + 1): resp = conn.get(ENDPOINT + url, params={'page': page}) if resp.status_code != 200: break result.extend(resp.json()['result']) except Exception as error: print(error) finally: return result