def getRandomUserAgent(): try: dbPath = "/usr/share/fake-useragent-db/fake_useragent_db.json" if os.path.exists(dbPath): # we don't want fake_useragent access internet return fake_useragent.UserAgent(path=dbPath).random else: return fake_useragent.UserAgent().random except fake_useragent.errors.FakeUserAgentError: return None
def get_page_av(self, url): if WITH_PROXY == 1 and len(Tools.PROXY_LIST) > 0: random.seed() i = random.randint(0, len(Tools.PROXY_LIST) - 1) proxy = urllib.request.ProxyHandler({'https': Tools.PROXY_LIST[i]}) auth = urllib.request.HTTPBasicAuthHandler() opener = urllib.request.build_opener(proxy, auth, urllib.request.HTTPHandler) urllib.request.install_opener(opener) req = urllib.request.Request( url=url, headers={'User-Agent': fake_useragent.UserAgent().random}, ) try: handler = urllib.request.urlopen(req) except urllib.error.HTTPError as e: if e.code == 404: return [], None elif e.code == 403: logging.warning(f'Bad proxy 403 {Tools.PROXY_LIST[i]}') del Tools.PROXY_LIST[i] raise else: raise except Exception: raise page = handler.read() doc_youla = lxml.html.document_fromstring(page) return doc_youla, page else: req = urllib.request.Request( url=url, headers={ 'User-Agent': fake_useragent.UserAgent().random, 'Cookie': 'location=%7B%22isConfirmed%22%3Atrue%2C%22lat%22%3A52.7585111%2C%22lng%22%3A32.2400969%2C%22r%22%3A5000%2C%22title%22%3A%22%5Cu041a%5Cu043b%5Cu0438%5Cu043d%5Cu0446%5Cu044b%22%2C%22city%22%3Anull%2C%22citySlug%22%3A%22all%22%2C%22cityLocation%22%3Afalse%2C%22pointLocation%22%3Atrue%2C%22defaultRadius%22%3Afalse%7D' }, ) try: handler = urllib.request.urlopen(req) except urllib.error.HTTPError as e: if e.code == 404: return [], None else: raise except Exception: raise page = handler.read() doc_youla = lxml.html.document_fromstring(page) return doc_youla, page
def download(url): try: page = requests.get(url, headers={'User-Agent': fake_useragent.UserAgent().chrome}) except: time.sleep(1) try: page = requests.get(url, headers={'User-Agent': fake_useragent.UserAgent().chrome}) except: print(f'=== download error {url}') return None if page.status_code != 200: print(f'=== page status code {page.status_code} for {url}') return None return page
def dirbust(self): dirs_found = [] ua = fake_useragent.UserAgent() self.headers = {'User-Agent': ua.random} remove_new_lines = lambda x: x.replace('\n', '') dir_list = list( map(remove_new_lines, open(self.wordlist, 'r').readlines())) for directory in dir_list: search_dir = self.addr + '{}'.format(directory) resp = None while resp is None: try: resp = requests.get(search_dir, headers=self.headers) except Exception as e: print(str(e)) if resp.status_code in ERROR_CODES: dirs_found.append({ 'Page': search_dir, 'Response': resp.status_code, 'children': [] }) return dirs_found
def getUserAgent(): ua = fake_useragent.UserAgent( fallback= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' ) return ua.random
def _user_agents(cls): """~fake_useragent.UserAgent: a collection of fake user-agents. """ filename = 'fake_useragent_{}.json'.format(fake_useragent.VERSION) return fake_useragent.UserAgent( path=cls._cachefs.getsyspath(filename), safe_attrs=['__name__', '__objclass__'])
def login(): ua = fk.UserAgent() se = requests.session() se.headers.update({'user-agent': ua.random}) d = { 'isValidate': 'true', 'password': '******', 'request_form_verifyCode': '', 'submit': '', 'username': '******', } z = 'https://passport.lagou.com/login/login.json' ul = 'https://passport.lagou.com/login/login.html' r2 = se.get(ul) re1 = re.compile(r"window.X_Anti_Forge_Token = \'(.*?)';", re.S) re2 = re.compile(r"window.X_Anti_Forge_Code = \'(.*?)';", re.S) h1 = re.search(re1, r2.text).group(1) h2 = re.search(re2, r2.text).group(1) se.headers.update({'X-Anit-Forge-Token': h1}) se.headers.update({'X-Anit-Forge-Code': h2}) se.headers.update({'Referer': ul}) print('se.headers=', se.headers) response = se.post(z, data=d) print(response.content.decode('utf-8'))
def parse_table(URL) -> list: user = fake_useragent.UserAgent().random # URL = 'https://football24.ua/ispaniya_tables_tag50823/' HEADERS = {'User-Agent': user} response = requests.get(URL, headers=HEADERS) soup = BeautifulSoup(response.content, 'lxml') block = soup.find('tbody') rows = block.find_all('tr') teams = list() for row in rows: values = row.find_all('td') values = [value.get_text(strip=True) for value in values] values = values[0:6] + values[14:16] pos, name = re.findall(r'(\d{1,2})(\w+).*', values[0])[0] teams.append({ 'logo': 0, 'pos': pos, 'name': name, 'played': values[2], 'won': values[3], 'draw': values[4], 'lost': values[5], 'for': values[6], 'against': values[7], 'points': values[1] }) return teams
def time_proxy(ip_addr, port, proxy_user=None, proxy_pass=None, url='https://www.google.com', timeout=10): success_counts = 0 start = time.time() ua = fake_useragent.UserAgent() pull_proxies = build_proxies(ip_addr, port, proxy_user, proxy_pass) for i in range(TOTAL_TRIES_PER_URL): try: r = requests.get(url, proxies=pull_proxies, headers={'User-agent': ua.chrome}, timeout=timeout) if r.status_code == 200: success_counts += 1 except Exception as e: print(e) print('for proxy {}'.format(pull_proxies)) print('total time {} for visiting {} times'.format(time.time() - start, TOTAL_TRIES_PER_URL)) print('success rate = {}'.format(success_counts / TOTAL_TRIES_PER_URL))
def __init__(self, login=None, password=None, timeout=2, user=None, link=None, auto=True): self.login = login self.password = password self.timeout = timeout if self.login is not None: Data.update_login_and_password(login=login) if self.password is not None: Data.update_login_and_password(password=password) if user is None: user = fake_useragent.UserAgent().random self.user = user if link is None: link = "https://ok.ru" self.link = link self.session = None self.header = { 'user-agent': self.user } self.params = { 'cmd': 'AnonymLogin', 'st.cmd': 'anonymLogin' } if auto: self.create_session() self.save_session()
def parse_table(URL) -> list: user = fake_useragent.UserAgent().random HEADERS = {'User-Agent': user} response = requests.get(URL, headers=HEADERS) soup = BeautifulSoup(response.content, 'lxml') block = soup.find('table', class_='standings-table') block = block.find('tbody') rows = block.find_all('tr') teams = list() for row in rows: values = row.findAll('td') values = [value.get_text(strip=True) for value in values] teams.append({ 'logo': 0, 'pos': values[0], 'name': values[2], 'played': values[3], 'won': values[4], 'draw': values[5], 'lost': values[6], 'for': values[7], 'against': values[8], 'points': values[9] }) return teams
def get_html(url, url_proxy=None): # header = { # 'User - Agent': 'Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv: 68.0) Gecko / 20100101 Firefox / 68.0', # 'Accept - Language': 'en - US, en; q = 0.5', # 'Accept - Encoding': 'gzip, deflate, br', # 'Connection': 'keep - alive' # } # Random User-Agent # header = { # 'Host': 'www.dcz.gov.ua', # 'User - Agent': 'Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv: 68.0) Gecko / 20100101 Firefox / 68.0', # 'Accept': '* / *', # 'Accept - Language': 'en - US, en; q = 0.5', # 'Accept - Encoding': 'gzip, deflate, br', # 'Referer': 'https: // www.dcz.gov.ua / userSearch / vacancy?koatuuVal = 101010510100000 & koatuuLab = % D0 % 92 % D1 % 96 % D0 % BD % D0 % BD % D0 % B8 % D1 % 86 % D1 % 8 F & regionID = 101010500000000 & activePage = 2 & itemsPerPage = 15', # 'Content - Type': 'application / json; charset = UTF - 8', # 'Origin': 'https: // www.dcz.gov.ua', # 'Content - Length': '97', # 'Connection': 'keep - alive', # 'Cookie': 'UserPreference_OfficeSuite = MS; has_js = 1' # } ua = fake_useragent.UserAgent() user = ua.random header = {'User-Agent': str(user)} params = {"StartRowIndex": 15, "MaximumRows": "15", "RegionID": "101010500000000", "KoatuuID": "101010510100000"} r = requests.post(url, params=params, headers=header) print(r.text) return r.text
def get_page(url: str): '''Возвращает страницу по переданной в нее ссылке url''' user_agent = fake_useragent.UserAgent() user = user_agent.random headers = {'User-Agent': str(user)} response = requests.get(url, headers=headers) return response
def __init__(self, burst, redis_class): try: """ fake_useragent 可能会缓存不下来请求头 """ ua = fake_useragent.UserAgent() self.ua_list = [ua.random for i in range(300)] except Exception as e: self.ua_list = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 QIHU 360EE" ] self.cookies_pool = [ "suid=4488859312; __admx_track_id=bvRi7-b_5hNR95FRxYkgzQ; __admx_track_id.sig=n3iY8rS_b02OZw4dpBWfh6VeTNA; __trackId=154588196450645; __uuid=115458819649530.a187a; _ga=GA1.2.1818966348.1545881966; agreedUserPrivacy=1; __chat_udid=ae091516-ee0b-4a34-96e2-539c81faa044; __s=f2cqnm56ml93p8vm56sbu7ui80; Hm_lvt_5a727f1b4acc5725516637e03b07d3d2=1553168304,1553220219,1553222217; __city=chongqing; __area2=tongwei; _gid=GA1.2.989251167.1553479553; _auth_redirect=http%3A%2F%2Fchongqing.baixing.com%2Fershoufang%2F%3Fsrc%3Dtopbar; __sense_session_pv=1; Hm_lpvt_5a727f1b4acc5725516637e03b07d3d2=1553517296; _gat=1" ] self.redis_con = redis.ConnectionPool(host="xxxxxxx", port=6379, db=12) self.r_db = redis.StrictRedis(connection_pool=self.redis_con) self.proxy_list = [] self.burst = burst self.redis_class = redis_class self.now = dt.now() self.today = "{0}-{1}-{2}".format(self.now.year, self.now.month, self.now.day)
def __init__(self, url): self.agent = fake_useragent.UserAgent() self.head = self.agent.random self.header = {'User-Agent': self.head} self.reqt = (requests.get(url, headers=self.header, timeout=None)) self.soup = BeautifulSoup((self.reqt).text, "lxml") # making soup
def save_photo(user_id, photo_id, photo_link): global PATH_TO_PHOTOS, PATH_TO_PHOTOS_LINKS, PROXIES_LIST photo_link = photo_link.replace('\n', '') photo_big_link = photo_link.replace('800x800', '1200x1200') user_agent = fake_useragent.UserAgent().random proxy = {'https': random.choice(PROXIES_LIST)} try: to_save = requests.get( url=photo_big_link, # headers=user_agent, proxies=proxy) if to_save.status_code == 404: to_save = requests.get( url=photo_link, # headers=user_agent, proxies=proxy) except requests.ConnectionError or requests.Timeout or requests.ConnectTimeout: print('\tPassed.') return 1 photo_name = PATH_TO_PHOTOS + '{}_{}.jpg'.format( user_id, photo_id) # user_id + photo number out = open(photo_name, "wb") out.write(to_save.content) out.close() write_csv(PATH_TO_PHOTOS_LINKS, [[user_id, photo_id, photo_link]]) print('\tSaved.') return 0
def parse(self, artist_name): """Parses concert.ua checking availability performance of this artist :param artist_name: :return: event_name, event_link: tuple """ ua = fake_useragent.UserAgent() headers = {'User-Agent': ua.chrome} artist_name = artist_name.lower() site = requests.get('https://concert.ua/uk/event/' + artist_name, headers=headers) if str(site) != '<Response [200]>': return False site.encoding = 'utf-8' bs = bs4.BeautifulSoup(site.text, "html.parser") event_name = bs.select('.event-main-info__name-value') if event_name: event_name = event_name[0] return event_name.text.strip(), \ 'https://concert.ua/uk/event/' + artist_name return False
def parse_table(URL) -> list: user = fake_useragent.UserAgent().random # URL = 'https://football24.ua/ispaniya_tables_tag50823/' HEADERS = {'User-Agent': user} response = requests.get(URL, headers=HEADERS) soup = BeautifulSoup(response.content, 'lxml') block = soup.find('tbody', class_='tableBodyContainer') rows = block.find_all('tr') teams = list() for row in rows: if row.get('class') != ['expandable']: # logo = _get_photo(row.find('img').get('src')) pos = row.find('span', class_='value').get_text(strip=True) name = row.find('span', class_='long').get_text(strip=True) points = row.findAll('td') points = [point.get_text(strip=True) for point in points] points = points[3:9] + points[10:11] teams.append({ 'logo': 0, 'pos': pos, 'name': name, 'played': points[0], 'won': points[1], 'draw': points[2], 'lost': points[3], 'for': points[4], 'against': points[5], 'points': points[6] }) return teams
def makeRequest(self, url): try: ua = fake_useragent.UserAgent(verify_ssl=False) headers = { 'User-Agent': ua.random, 'TE': 'Trailers', # 'Referer':url, 'Cookie': 'x-zp-client-id=604da560-b85b-4bb9-8490-ff855a04d300; sts_deviceid=1760f31d861fa-0efc091273c9f28-4c3f2779-1049088-1760f31d863ff; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22662040894%22%2C%22first_id%22%3A%221760f31d87d9-04919f4ca1beeb8-4c3f2779-1049088-1760f31d8804f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_refe…rt=84a950e77e054854b4d2f9d90826d063; acw_tc=2760826a16065748843918699e79278d6f3843d7bbcabf231429e46fa643ac; ssxmod_itna=eqjxBDcDuQ0=IRDl4iuiFD0ii=eiIhX5PENoeD/KAmDnqD=GFDK40ooO3wQoGC80oTwWYRW8G44ebPn03pQolhEjvF2Yx0aDbqGk3tc4ii9DCeDIDWeDiDG4Gml4GtDpxG=yDm4i3jxGeDe2IODY5DhxDC00PDwx0CjEiKWRFGCa71=tv4xt0DjxG1N40HWi3AoFSEq0H3Ix0k040Oya5kRcYDU74PElrd1gPDmxdDyPE=DiPkqmhUOD0tjoQxWQ=DBO6eZmr3ZAiiQixPeWR5T/rK/ih43BTFZCrKDDcl7YD===; ssxmod_itna2=eqjxBDcDuQ0=IRDl4iuiFD0ii=eiIhX5PENG9t5DRO1DGNewQGaKKjk5txKdMP08DedwD===' } proxy = Proxy.ipProxy.IpProxy(self.proxyUrl) ipList = proxy.getIpList() ip = random.choice(ipList) proxies = { 'http': ip, } response = self.session.get(url, timeout=30, headers=headers, proxies=proxies, verify=False) response.raise_for_status() return response.text except Exception as ex: print(ex) return None
def get_html(self): for i in range(1, 3423): print('开始一级爬取爬取,正在爬取第{}页'.format(i)) url = self.url.format(i) headers = {'User-Agent': fake_useragent.UserAgent().random} try: if api_settings.USE_PROXY_TO_XICI: proxies = self.get_random_proxy() res = requests.get(url, headers=headers, timeout=api_settings.TIME_OUT, proxies=proxies) else: res = requests.get( url, headers=headers, timeout=api_settings.TIME_OUT, ) except Exception as e: print(e) continue res.encoding = 'utf-8' html = res.content self.parse_html(html)
def logoin(url): # 随机生成User-Agent header = {"User-Agent": fake_useragent.UserAgent().random} # 登录药智网的用户名和密码,已经药智网自己设计用来验证的东西:formhash,backurl login_data = { "username": "******", "pwd": "axxcsa", "formhash": "1766301C66", "backurl": "https%3A%2F%2Fwww.yaozh.com%2F" } # post请求也需要将参数给 转成url编码,这个是http规范 login_data_encode = urllib.parse.urlencode(login_data) request = urllib.request.Request(url, data=login_data_encode.encode("utf-8"), headers=header) # CookieJar 是用来保存cookie的 cookjar = http.cookiejar.CookieJar() # cookie的handler处理器 cookie_hanler = urllib.request.HTTPCookieProcessor(cookjar) # cookie的opener cookie_opener = urllib.request.build_opener(cookie_hanler) ############登录药智网,如果登录成功,那么会自动将cookie给保存到内存中############################ cookie_opener.open(request) # 执行到这里的话,说明已经成功登录药智网,并且cookie也已经自动保存好了,然后我们开始 直接登录到个人中心页面里去 center_url = "https://www.yaozh.com/member/" # 药智网个人中心页面 center_request = urllib.request.Request(center_url, headers=header) # -----------------------注意:你可能会疑问,为什么这段代码,就没有看见cookie,实际上,我估计cookie是保存到内存中了, # -----------------------然后cookie_opener 这个opener,可以取到cookie的,然后open()发送访问请求的时候,直接将cookie给带上了,当然这个是自动带上的 response = cookie_opener.open(center_request) htmlsource = response.read().decode("utf-8") with open("yaozhiwang.html", "w", encoding="utf-8") as f: f.write(htmlsource)
def __init__(self): try: """ fake_useragent 可能会缓存不下来请求头 """ ua = fake_useragent.UserAgent() self.ua_list = [ua.random for i in range(300)] except Exception as e: self.ua_list = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 QIHU 360EE" ] self.redis_con = redis.ConnectionPool(host="xxxxx", port=6379, db=12) self.r_db = redis.StrictRedis(connection_pool=self.redis_con) self.redis_class = "58" self.change_cookie = False self.cookies_pool = [ "f=n; commontopbar_new_city_info=2%7C%E4%B8%8A%E6%B5%B7%7Csh; userid360_xml=D22CC785632D31027525A6084FD5335E; time_create=1556176801575; commontopbar_ipcity=cd%7C%E6%88%90%E9%83%BD%7C0; id58=c5/njVyZ0qAqX2HGAyQlAg==; wmda_uuid=bcb64a473057c252e44db5957546d796; wmda_new_uuid=1; wmda_visited_projects=%3B6333604277682; 58tj_uuid=9a82548d-8cdc-40d8-bfc7-12b5d524fed3; new_uv=1; als=0; xxzl_deviceid=nTJEQceHMJYfZXPrsp57h5CoHNWiaD958mj%2Bfrs5Cp1LkUv11%2F%2Fe2jfSyI6Sjx3c; JSESSIONID=24F6C1ADB25A360FB1965102F7A8DBB9; xzfzqtoken=g0yphhiwlkQiBsgXOCR3CDu2LCBW1EA6wWVGuFxDLKMYuZo3deea12niiQgGVR%2Fkin35brBb%2F%2FeSODvMgkQULA%3D%3D" ] self.error_url = None self.burst = 50 self.sleep_time_min = 5 self.sleep_time_max = 12
def crawler(url, newHeader={}, param=None): header = {'User-Agent': fake_useragent.UserAgent().random} header.update(newHeader) response = requests.get(url, headers=header, params=param) response.raise_for_status() response.encoding = response.apparent_encoding return response.text
def fetch(url): data = "" r = "" # p = current_process() # if(p.name != 'MainProcess' and p._identity[0] and os.getpid()): # print('process counter:', p._identity[0], 'pid:', os.getpid()) asession = HTMLSession() asession.headers.update({'User-Agent': fake_useragent.UserAgent().random}) asession.max_redirects = 60 #parsing from proxy # proxy = { 'http': 'http://' + choice(read_file("proxies.txt","\n")) +'/' } # asession.proxies.update(proxy) unf = uniform(1,6) time.sleep(unf) try: r = asession.request('GET', url, allow_redirects=False) except Exception as e: print('Failed to get page %s. Reason: %s' % (url, e)) asession.close() return data try: if(r.status_code == 200): r.html.render(sleep = 2, timeout = 200) data = r.html asession.close() return data else: asession.close() return data except Exception as e: print('Failed to render page %s. Reason: %s' % (url, e)) asession.close() return data
def util(district_id, date): temp_user_agent = fake_useragent.UserAgent(verify_ssl=False) browser_header = {'User-Agent': temp_user_agent.random} URL_DIS_ID_DATE = "https://cdn-api.co-vin.in/api/v2/appointment/sessions/public/calendarByDistrict?district_id={}&" \ "date={}".format(district_id, date) response = requests.get(URL_DIS_ID_DATE, headers=browser_header) resp_json = response.json()["centers"] if len(resp_json) == 0: return "No Data Found" results = [] for item in resp_json: result = dict() result['name'] = item['name'] result['address'] = item['address'] result['block'] = item['block_name'] result['pincode'] = item['pincode'] result['fee'] = item['fee_type'] result['avl'] = item['sessions'][0]['available_capacity'] result['min_age'] = item['sessions'][0]['min_age_limit'] result['vaccine'] = item['sessions'][0]['vaccine'] result['avl_1'] = item['sessions'][0]['available_capacity_dose1'] result['avl_2'] = item['sessions'][0]['available_capacity_dose2'] results.append(result) return json.dumps({'results': results}, indent=4)
def main(): # Global variables global reverse_search global ua # Fetch some User Agent string from the internet ua = fake_useragent.UserAgent() # Open our config file with open('config.json') as config_file: config = json.load(config_file) # Loop through the web pages listed in the config file for i in config['Sites']: try: reverse_search = config['Sites'][i]['Reverse search'] except KeyError: reverse_search = False result = checker( config['Sites'][i]['URL'], config['Sites'][i]['Keyword']) if result == ':)': send_push( "Check out " + i, config['Sites'][i]['URL'], config['Settings']['Pushover token'], config['Settings']['Pushover key'])
def parse_table(URL) -> list: user = fake_useragent.UserAgent().random # URL = 'https://football24.ua/ispaniya_tables_tag50823/' HEADERS = { 'User-Agent': user } response = requests.get(URL, headers=HEADERS) soup = BeautifulSoup(response.content, 'lxml') block = soup.find('table, class_='leaguetable sortable table detailed-table) block = block.find('tbody') rows = block.find_all('tr') teams = list() for row in rows: values = row.findAll('td') values = [value.get_text(strip=True) for value in values] teams.append({ 'logo': 0, 'pos': values[0], 'name': values[2], 'played': values[3], 'won': values[4], 'draw': values[5], 'lost': values[6], 'for': values[7], 'against': values[8], 'points': values[10] }) return teams
def get_last_lotto_num(params=None): url = "https://dhlottery.co.kr/gameResult.do?method=byWin" ua = fake_useragent.UserAgent() hdr = {'User-Agent': str(ua.random)} req = requests.get(url, params=params, headers=hdr) print('[get_html] url:', req.url) print('[get_html] status_code:', req.status_code) if req.status_code != 200: print("Error!") sys.exit(1) soup = BeautifulSoup(req.text, 'html.parser') last_round = soup.find('div', {'class': 'win_result'}).h4.strong.text last_round_date_check1 = soup.find('p', {'class': 'desc'}).text.split("(") last_round_date_check2 = last_round_date_check1[1].split(")") last_round_date = last_round_date_check2[0] last_round_num_check = soup.find('div', { 'class': 'num win' }).p.text.split('\n') last_round_num = [] for i in range(0, 7): if i == 0 or i == 7: continue last_round_num.append(int(last_round_num_check[i])) last_round_bonus_num = soup.find('div', {'class': 'num bonus'}).p.text return last_round, last_round_date, last_round_num, last_round_bonus_num
def time_proxy(ip_addr, port, proxy_user=None, proxy_pass=None, url='https://www.redfin.com', timeout=10, TOTAL_TRIES_PER_URL=2): """Check all proxies in proxy.csv for ability to connect to redfin.com. Each proxy requests connection to the site twice, and only those with 100% success rate (2/2) kept. """ success_counts = 0 ua = fake_useragent.UserAgent() pull_proxies = construct_proxy(ip_addr, port) for i in range(TOTAL_TRIES_PER_URL): try: r = requests.get(url, proxies=pull_proxies, headers={'User-agent': ua.chrome}, timeout=timeout) if r.status_code == 200: success_counts += 1 except Exception: pass print('for proxy {}'.format(pull_proxies)) print('success rate = {}'.format(success_counts / TOTAL_TRIES_PER_URL)) return success_counts / TOTAL_TRIES_PER_URL
def _random_headers(self): headers = { "dnt": "1", "user-agent": fake_useragent.UserAgent().random, "origin": "https://www.bing.com", "referer": "https://www.bing.com/translator", } return headers