Exemple #1
0
 def getRandomUserAgent():
     try:
         dbPath = "/usr/share/fake-useragent-db/fake_useragent_db.json"
         if os.path.exists(dbPath):
             # we don't want fake_useragent access internet
             return fake_useragent.UserAgent(path=dbPath).random
         else:
             return fake_useragent.UserAgent().random
     except fake_useragent.errors.FakeUserAgentError:
         return None
 def get_page_av(self, url):
     if WITH_PROXY == 1 and len(Tools.PROXY_LIST) > 0:
         random.seed()
         i = random.randint(0, len(Tools.PROXY_LIST) - 1)
         proxy = urllib.request.ProxyHandler({'https': Tools.PROXY_LIST[i]})
         auth = urllib.request.HTTPBasicAuthHandler()
         opener = urllib.request.build_opener(proxy, auth,
                                              urllib.request.HTTPHandler)
         urllib.request.install_opener(opener)
         req = urllib.request.Request(
             url=url,
             headers={'User-Agent': fake_useragent.UserAgent().random},
         )
         try:
             handler = urllib.request.urlopen(req)
         except urllib.error.HTTPError as e:
             if e.code == 404:
                 return [], None
             elif e.code == 403:
                 logging.warning(f'Bad proxy 403 {Tools.PROXY_LIST[i]}')
                 del Tools.PROXY_LIST[i]
                 raise
             else:
                 raise
         except Exception:
             raise
         page = handler.read()
         doc_youla = lxml.html.document_fromstring(page)
         return doc_youla, page
     else:
         req = urllib.request.Request(
             url=url,
             headers={
                 'User-Agent':
                 fake_useragent.UserAgent().random,
                 'Cookie':
                 'location=%7B%22isConfirmed%22%3Atrue%2C%22lat%22%3A52.7585111%2C%22lng%22%3A32.2400969%2C%22r%22%3A5000%2C%22title%22%3A%22%5Cu041a%5Cu043b%5Cu0438%5Cu043d%5Cu0446%5Cu044b%22%2C%22city%22%3Anull%2C%22citySlug%22%3A%22all%22%2C%22cityLocation%22%3Afalse%2C%22pointLocation%22%3Atrue%2C%22defaultRadius%22%3Afalse%7D'
             },
         )
         try:
             handler = urllib.request.urlopen(req)
         except urllib.error.HTTPError as e:
             if e.code == 404:
                 return [], None
             else:
                 raise
         except Exception:
             raise
         page = handler.read()
         doc_youla = lxml.html.document_fromstring(page)
         return doc_youla, page
Exemple #3
0
def download(url):
    try:
        page = requests.get(url, headers={'User-Agent': fake_useragent.UserAgent().chrome})
    except:
        time.sleep(1)
        try:
            page = requests.get(url, headers={'User-Agent': fake_useragent.UserAgent().chrome})
        except:
            print(f'=== download error  {url}')
            return None
    if page.status_code != 200:
        print(f'=== page status code {page.status_code} for  {url}')
        return None
    return page
Exemple #4
0
    def dirbust(self):
        dirs_found = []
        ua = fake_useragent.UserAgent()

        self.headers = {'User-Agent': ua.random}

        remove_new_lines = lambda x: x.replace('\n', '')
        dir_list = list(
            map(remove_new_lines,
                open(self.wordlist, 'r').readlines()))

        for directory in dir_list:
            search_dir = self.addr + '{}'.format(directory)
            resp = None
            while resp is None:
                try:
                    resp = requests.get(search_dir, headers=self.headers)
                except Exception as e:
                    print(str(e))

            if resp.status_code in ERROR_CODES:
                dirs_found.append({
                    'Page': search_dir,
                    'Response': resp.status_code,
                    'children': []
                })

        return dirs_found
Exemple #5
0
def getUserAgent():
    ua = fake_useragent.UserAgent(
        fallback=
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    )

    return ua.random
Exemple #6
0
 def _user_agents(cls):
     """~fake_useragent.UserAgent: a collection of fake user-agents.
     """
     filename = 'fake_useragent_{}.json'.format(fake_useragent.VERSION)
     return fake_useragent.UserAgent(
         path=cls._cachefs.getsyspath(filename),
         safe_attrs=['__name__', '__objclass__'])
Exemple #7
0
def login():
    ua = fk.UserAgent()
    se = requests.session()

    se.headers.update({'user-agent': ua.random})
    d = {
        'isValidate': 'true',
        'password': '******',
        'request_form_verifyCode': '',
        'submit': '',
        'username': '******',
    }

    z = 'https://passport.lagou.com/login/login.json'
    ul = 'https://passport.lagou.com/login/login.html'
    r2 = se.get(ul)
    re1 = re.compile(r"window.X_Anti_Forge_Token = \'(.*?)';", re.S)
    re2 = re.compile(r"window.X_Anti_Forge_Code = \'(.*?)';", re.S)
    h1 = re.search(re1, r2.text).group(1)
    h2 = re.search(re2, r2.text).group(1)
    se.headers.update({'X-Anit-Forge-Token': h1})
    se.headers.update({'X-Anit-Forge-Code': h2})
    se.headers.update({'Referer': ul})
    print('se.headers=', se.headers)
    response = se.post(z, data=d)
    print(response.content.decode('utf-8'))
Exemple #8
0
def parse_table(URL) -> list:
    user = fake_useragent.UserAgent().random
    # URL = 'https://football24.ua/ispaniya_tables_tag50823/'
    HEADERS = {'User-Agent': user}
    response = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'lxml')
    block = soup.find('tbody')
    rows = block.find_all('tr')
    teams = list()
    for row in rows:
        values = row.find_all('td')
        values = [value.get_text(strip=True) for value in values]
        values = values[0:6] + values[14:16]
        pos, name = re.findall(r'(\d{1,2})(\w+).*', values[0])[0]
        teams.append({
            'logo': 0,
            'pos': pos,
            'name': name,
            'played': values[2],
            'won': values[3],
            'draw': values[4],
            'lost': values[5],
            'for': values[6],
            'against': values[7],
            'points': values[1]
        })
    return teams
Exemple #9
0
def time_proxy(ip_addr,
               port,
               proxy_user=None,
               proxy_pass=None,
               url='https://www.google.com',
               timeout=10):
    success_counts = 0
    start = time.time()
    ua = fake_useragent.UserAgent()

    pull_proxies = build_proxies(ip_addr, port, proxy_user, proxy_pass)
    for i in range(TOTAL_TRIES_PER_URL):
        try:
            r = requests.get(url,
                             proxies=pull_proxies,
                             headers={'User-agent': ua.chrome},
                             timeout=timeout)
            if r.status_code == 200:
                success_counts += 1
        except Exception as e:
            print(e)

    print('for proxy {}'.format(pull_proxies))
    print('total time {} for visiting {} times'.format(time.time() - start,
                                                       TOTAL_TRIES_PER_URL))
    print('success rate = {}'.format(success_counts / TOTAL_TRIES_PER_URL))
Exemple #10
0
    def __init__(self, login=None, password=None, timeout=2, user=None, link=None, auto=True):

        self.login = login
        self.password = password
        self.timeout = timeout

        if self.login is not None:
            Data.update_login_and_password(login=login)

        if self.password is not None:
            Data.update_login_and_password(password=password)

        if user is None:
            user = fake_useragent.UserAgent().random
        self.user = user

        if link is None:
            link = "https://ok.ru"
        self.link = link
        self.session = None

        self.header = {
            'user-agent': self.user
        }
        self.params = {
            'cmd': 'AnonymLogin',
            'st.cmd': 'anonymLogin'
        }
        if auto:
            self.create_session()
            self.save_session()
Exemple #11
0
def parse_table(URL) -> list:
    user = fake_useragent.UserAgent().random
    HEADERS = {'User-Agent': user}
    response = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'lxml')
    block = soup.find('table', class_='standings-table')
    block = block.find('tbody')
    rows = block.find_all('tr')
    teams = list()
    for row in rows:
        values = row.findAll('td')
        values = [value.get_text(strip=True) for value in values]
        teams.append({
            'logo': 0,
            'pos': values[0],
            'name': values[2],
            'played': values[3],
            'won': values[4],
            'draw': values[5],
            'lost': values[6],
            'for': values[7],
            'against': values[8],
            'points': values[9]
        })
    return teams
Exemple #12
0
def get_html(url, url_proxy=None):
    # header = {
    #     'User - Agent': 'Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv: 68.0) Gecko / 20100101 Firefox / 68.0',
    #     'Accept - Language': 'en - US, en; q = 0.5',
    #     'Accept - Encoding': 'gzip, deflate, br',
    #     'Connection': 'keep - alive'
    # }
    # Random User-Agent

    # header = {
    #     'Host': 'www.dcz.gov.ua',
    #     'User - Agent': 'Mozilla / 5.0(X11; Ubuntu; Linux x86_64; rv: 68.0) Gecko / 20100101 Firefox / 68.0',
    #     'Accept': '* / *',
    #     'Accept - Language': 'en - US, en; q = 0.5',
    #     'Accept - Encoding': 'gzip, deflate, br',
    #     'Referer': 'https: // www.dcz.gov.ua / userSearch / vacancy?koatuuVal = 101010510100000 & koatuuLab = % D0 % 92 % D1 % 96 % D0 % BD % D0 % BD % D0 % B8 % D1 % 86 % D1 % 8 F & regionID = 101010500000000 & activePage = 2 & itemsPerPage = 15',
    #     'Content - Type': 'application / json; charset = UTF - 8',
    #     'Origin': 'https: // www.dcz.gov.ua',
    #     'Content - Length': '97',
    #     'Connection': 'keep - alive',
    #     'Cookie': 'UserPreference_OfficeSuite = MS; has_js = 1'
    # }

    ua = fake_useragent.UserAgent()
    user = ua.random
    header = {'User-Agent': str(user)}

    params = {"StartRowIndex": 15, "MaximumRows": "15", "RegionID": "101010500000000", "KoatuuID": "101010510100000"}

    r = requests.post(url, params=params, headers=header)

    print(r.text)
    return r.text
Exemple #13
0
def get_page(url: str):
    '''Возвращает страницу по переданной в нее ссылке url'''
    user_agent = fake_useragent.UserAgent()
    user = user_agent.random
    headers = {'User-Agent': str(user)}
    response = requests.get(url, headers=headers)
    return response
Exemple #14
0
 def __init__(self, burst, redis_class):
     try:
         """
         fake_useragent 可能会缓存不下来请求头
         """
         ua = fake_useragent.UserAgent()
         self.ua_list = [ua.random for i in range(300)]
     except Exception as e:
         self.ua_list = [
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0",
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36",
             "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
             "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 QIHU 360EE"
         ]
     self.cookies_pool = [
         "suid=4488859312; __admx_track_id=bvRi7-b_5hNR95FRxYkgzQ; __admx_track_id.sig=n3iY8rS_b02OZw4dpBWfh6VeTNA; __trackId=154588196450645; __uuid=115458819649530.a187a; _ga=GA1.2.1818966348.1545881966; agreedUserPrivacy=1; __chat_udid=ae091516-ee0b-4a34-96e2-539c81faa044; __s=f2cqnm56ml93p8vm56sbu7ui80; Hm_lvt_5a727f1b4acc5725516637e03b07d3d2=1553168304,1553220219,1553222217; __city=chongqing; __area2=tongwei; _gid=GA1.2.989251167.1553479553; _auth_redirect=http%3A%2F%2Fchongqing.baixing.com%2Fershoufang%2F%3Fsrc%3Dtopbar; __sense_session_pv=1; Hm_lpvt_5a727f1b4acc5725516637e03b07d3d2=1553517296; _gat=1"
     ]
     self.redis_con = redis.ConnectionPool(host="xxxxxxx", port=6379, db=12)
     self.r_db = redis.StrictRedis(connection_pool=self.redis_con)
     self.proxy_list = []
     self.burst = burst
     self.redis_class = redis_class
     self.now = dt.now()
     self.today = "{0}-{1}-{2}".format(self.now.year, self.now.month,
                                       self.now.day)
    def __init__(self, url):

        self.agent = fake_useragent.UserAgent()
        self.head = self.agent.random
        self.header = {'User-Agent': self.head}
        self.reqt = (requests.get(url, headers=self.header, timeout=None))
        self.soup = BeautifulSoup((self.reqt).text, "lxml")  # making soup
def save_photo(user_id, photo_id, photo_link):
    global PATH_TO_PHOTOS, PATH_TO_PHOTOS_LINKS, PROXIES_LIST
    photo_link = photo_link.replace('\n', '')
    photo_big_link = photo_link.replace('800x800', '1200x1200')
    user_agent = fake_useragent.UserAgent().random
    proxy = {'https': random.choice(PROXIES_LIST)}
    try:
        to_save = requests.get(
            url=photo_big_link,
            # headers=user_agent,
            proxies=proxy)
        if to_save.status_code == 404:
            to_save = requests.get(
                url=photo_link,
                # headers=user_agent,
                proxies=proxy)
    except requests.ConnectionError or requests.Timeout or requests.ConnectTimeout:
        print('\tPassed.')
        return 1
    photo_name = PATH_TO_PHOTOS + '{}_{}.jpg'.format(
        user_id, photo_id)  # user_id + photo number
    out = open(photo_name, "wb")
    out.write(to_save.content)
    out.close()
    write_csv(PATH_TO_PHOTOS_LINKS, [[user_id, photo_id, photo_link]])
    print('\tSaved.')
    return 0
    def parse(self, artist_name):
        """Parses concert.ua checking availability performance of this artist

        :param artist_name:
        :return: event_name, event_link: tuple
        """

        ua = fake_useragent.UserAgent()
        headers = {'User-Agent': ua.chrome}
        artist_name = artist_name.lower()
        site = requests.get('https://concert.ua/uk/event/' + artist_name,
                            headers=headers)

        if str(site) != '<Response [200]>':
            return False

        site.encoding = 'utf-8'
        bs = bs4.BeautifulSoup(site.text, "html.parser")

        event_name = bs.select('.event-main-info__name-value')
        if event_name:
            event_name = event_name[0]
            return event_name.text.strip(), \
                   'https://concert.ua/uk/event/' + artist_name

        return False
Exemple #18
0
def parse_table(URL) -> list:
    user = fake_useragent.UserAgent().random
    # URL = 'https://football24.ua/ispaniya_tables_tag50823/'
    HEADERS = {'User-Agent': user}
    response = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'lxml')
    block = soup.find('tbody', class_='tableBodyContainer')
    rows = block.find_all('tr')
    teams = list()
    for row in rows:
        if row.get('class') != ['expandable']:
            # logo = _get_photo(row.find('img').get('src'))
            pos = row.find('span', class_='value').get_text(strip=True)
            name = row.find('span', class_='long').get_text(strip=True)
            points = row.findAll('td')
            points = [point.get_text(strip=True) for point in points]
            points = points[3:9] + points[10:11]
            teams.append({
                'logo': 0,
                'pos': pos,
                'name': name,
                'played': points[0],
                'won': points[1],
                'draw': points[2],
                'lost': points[3],
                'for': points[4],
                'against': points[5],
                'points': points[6]
            })
    return teams
Exemple #19
0
 def makeRequest(self, url):
     try:
         ua = fake_useragent.UserAgent(verify_ssl=False)
         headers = {
             'User-Agent':
             ua.random,
             'TE':
             'Trailers',
             # 'Referer':url,
             'Cookie':
             'x-zp-client-id=604da560-b85b-4bb9-8490-ff855a04d300; sts_deviceid=1760f31d861fa-0efc091273c9f28-4c3f2779-1049088-1760f31d863ff; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22662040894%22%2C%22first_id%22%3A%221760f31d87d9-04919f4ca1beeb8-4c3f2779-1049088-1760f31d8804f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_refe…rt=84a950e77e054854b4d2f9d90826d063; acw_tc=2760826a16065748843918699e79278d6f3843d7bbcabf231429e46fa643ac; ssxmod_itna=eqjxBDcDuQ0=IRDl4iuiFD0ii=eiIhX5PENoeD/KAmDnqD=GFDK40ooO3wQoGC80oTwWYRW8G44ebPn03pQolhEjvF2Yx0aDbqGk3tc4ii9DCeDIDWeDiDG4Gml4GtDpxG=yDm4i3jxGeDe2IODY5DhxDC00PDwx0CjEiKWRFGCa71=tv4xt0DjxG1N40HWi3AoFSEq0H3Ix0k040Oya5kRcYDU74PElrd1gPDmxdDyPE=DiPkqmhUOD0tjoQxWQ=DBO6eZmr3ZAiiQixPeWR5T/rK/ih43BTFZCrKDDcl7YD===; ssxmod_itna2=eqjxBDcDuQ0=IRDl4iuiFD0ii=eiIhX5PENG9t5DRO1DGNewQGaKKjk5txKdMP08DedwD==='
         }
         proxy = Proxy.ipProxy.IpProxy(self.proxyUrl)
         ipList = proxy.getIpList()
         ip = random.choice(ipList)
         proxies = {
             'http': ip,
         }
         response = self.session.get(url,
                                     timeout=30,
                                     headers=headers,
                                     proxies=proxies,
                                     verify=False)
         response.raise_for_status()
         return response.text
     except Exception as ex:
         print(ex)
         return None
    def get_html(self):
        for i in range(1, 3423):
            print('开始一级爬取爬取,正在爬取第{}页'.format(i))
            url = self.url.format(i)
            headers = {'User-Agent': fake_useragent.UserAgent().random}

            try:
                if api_settings.USE_PROXY_TO_XICI:
                    proxies = self.get_random_proxy()
                    res = requests.get(url,
                                       headers=headers,
                                       timeout=api_settings.TIME_OUT,
                                       proxies=proxies)
                else:
                    res = requests.get(
                        url,
                        headers=headers,
                        timeout=api_settings.TIME_OUT,
                    )
            except Exception as e:
                print(e)
                continue
            res.encoding = 'utf-8'
            html = res.content
            self.parse_html(html)
Exemple #21
0
def logoin(url):
    # 随机生成User-Agent
    header = {"User-Agent": fake_useragent.UserAgent().random}
    # 登录药智网的用户名和密码,已经药智网自己设计用来验证的东西:formhash,backurl
    login_data = {
        "username": "******",
        "pwd": "axxcsa",
        "formhash": "1766301C66",
        "backurl": "https%3A%2F%2Fwww.yaozh.com%2F"
    }
    # post请求也需要将参数给 转成url编码,这个是http规范
    login_data_encode = urllib.parse.urlencode(login_data)
    request = urllib.request.Request(url,
                                     data=login_data_encode.encode("utf-8"),
                                     headers=header)
    # CookieJar 是用来保存cookie的
    cookjar = http.cookiejar.CookieJar()
    # cookie的handler处理器
    cookie_hanler = urllib.request.HTTPCookieProcessor(cookjar)
    # cookie的opener
    cookie_opener = urllib.request.build_opener(cookie_hanler)
    ############登录药智网,如果登录成功,那么会自动将cookie给保存到内存中############################
    cookie_opener.open(request)

    # 执行到这里的话,说明已经成功登录药智网,并且cookie也已经自动保存好了,然后我们开始 直接登录到个人中心页面里去
    center_url = "https://www.yaozh.com/member/"  # 药智网个人中心页面
    center_request = urllib.request.Request(center_url, headers=header)
    # -----------------------注意:你可能会疑问,为什么这段代码,就没有看见cookie,实际上,我估计cookie是保存到内存中了,
    # -----------------------然后cookie_opener 这个opener,可以取到cookie的,然后open()发送访问请求的时候,直接将cookie给带上了,当然这个是自动带上的
    response = cookie_opener.open(center_request)
    htmlsource = response.read().decode("utf-8")
    with open("yaozhiwang.html", "w", encoding="utf-8") as f:
        f.write(htmlsource)
Exemple #22
0
 def __init__(self):
     try:
         """
         fake_useragent 可能会缓存不下来请求头
         """
         ua = fake_useragent.UserAgent()
         self.ua_list = [ua.random for i in range(300)]
     except Exception as e:
         self.ua_list = [
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0",
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36",
             "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
             "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 QIHU 360EE"
         ]
     self.redis_con = redis.ConnectionPool(host="xxxxx", port=6379, db=12)
     self.r_db = redis.StrictRedis(connection_pool=self.redis_con)
     self.redis_class = "58"
     self.change_cookie = False
     self.cookies_pool = [
         "f=n; commontopbar_new_city_info=2%7C%E4%B8%8A%E6%B5%B7%7Csh; userid360_xml=D22CC785632D31027525A6084FD5335E; time_create=1556176801575; commontopbar_ipcity=cd%7C%E6%88%90%E9%83%BD%7C0; id58=c5/njVyZ0qAqX2HGAyQlAg==; wmda_uuid=bcb64a473057c252e44db5957546d796; wmda_new_uuid=1; wmda_visited_projects=%3B6333604277682; 58tj_uuid=9a82548d-8cdc-40d8-bfc7-12b5d524fed3; new_uv=1; als=0; xxzl_deviceid=nTJEQceHMJYfZXPrsp57h5CoHNWiaD958mj%2Bfrs5Cp1LkUv11%2F%2Fe2jfSyI6Sjx3c; JSESSIONID=24F6C1ADB25A360FB1965102F7A8DBB9; xzfzqtoken=g0yphhiwlkQiBsgXOCR3CDu2LCBW1EA6wWVGuFxDLKMYuZo3deea12niiQgGVR%2Fkin35brBb%2F%2FeSODvMgkQULA%3D%3D"
     ]
     self.error_url = None
     self.burst = 50
     self.sleep_time_min = 5
     self.sleep_time_max = 12
Exemple #23
0
def crawler(url, newHeader={}, param=None):
    header = {'User-Agent': fake_useragent.UserAgent().random}
    header.update(newHeader)
    response = requests.get(url, headers=header, params=param)
    response.raise_for_status()
    response.encoding = response.apparent_encoding
    return response.text
def fetch(url):
    data = ""
    r = ""
    # p = current_process()
    # if(p.name != 'MainProcess' and p._identity[0] and os.getpid()):
    #     print('process counter:', p._identity[0], 'pid:', os.getpid())
    asession = HTMLSession()
    asession.headers.update({'User-Agent': fake_useragent.UserAgent().random})
    asession.max_redirects = 60
    #parsing from proxy
    # proxy = { 'http': 'http://' + choice(read_file("proxies.txt","\n")) +'/' }
    # asession.proxies.update(proxy)
    unf = uniform(1,6)
    time.sleep(unf)
    try:
        r = asession.request('GET', url, allow_redirects=False)
    except Exception as e:
        print('Failed to get page %s. Reason: %s' % (url, e))
        asession.close()
        return data
    try:
        if(r.status_code == 200):
            r.html.render(sleep = 2, timeout = 200)
            data = r.html
            asession.close()
            return data
        else:
            asession.close()
            return data
    except Exception as e:
        print('Failed to render page %s. Reason: %s' % (url, e))
        asession.close()
        return data
Exemple #25
0
def util(district_id, date):
    temp_user_agent = fake_useragent.UserAgent(verify_ssl=False)
    browser_header = {'User-Agent': temp_user_agent.random}
    URL_DIS_ID_DATE = "https://cdn-api.co-vin.in/api/v2/appointment/sessions/public/calendarByDistrict?district_id={}&" \
                      "date={}".format(district_id, date)
    response = requests.get(URL_DIS_ID_DATE, headers=browser_header)
    resp_json = response.json()["centers"]
    if len(resp_json) == 0:
        return "No Data Found"

    results = []
    for item in resp_json:
        result = dict()
        result['name'] = item['name']
        result['address'] = item['address']
        result['block'] = item['block_name']
        result['pincode'] = item['pincode']
        result['fee'] = item['fee_type']
        result['avl'] = item['sessions'][0]['available_capacity']
        result['min_age'] = item['sessions'][0]['min_age_limit']
        result['vaccine'] = item['sessions'][0]['vaccine']
        result['avl_1'] = item['sessions'][0]['available_capacity_dose1']
        result['avl_2'] = item['sessions'][0]['available_capacity_dose2']
        results.append(result)

    return json.dumps({'results': results}, indent=4)
Exemple #26
0
def main():
    # Global variables
    global reverse_search
    global ua

    # Fetch some User Agent string from the internet
    ua = fake_useragent.UserAgent()

    # Open our config file
    with open('config.json') as config_file:
        config = json.load(config_file)

    # Loop through the web pages listed in the config file
    for i in config['Sites']:
        try:
            reverse_search = config['Sites'][i]['Reverse search']
        except KeyError:
            reverse_search = False

        result = checker(
                config['Sites'][i]['URL'],
                config['Sites'][i]['Keyword'])

        if result == ':)':
            send_push(
                    "Check out " + i,
                    config['Sites'][i]['URL'],
                    config['Settings']['Pushover token'],
                    config['Settings']['Pushover key'])
Exemple #27
0
def parse_table(URL) -> list:
    user = fake_useragent.UserAgent().random
    # URL = 'https://football24.ua/ispaniya_tables_tag50823/'
    HEADERS = {
        'User-Agent': user
    }
    response = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'lxml')
    block = soup.find('table, class_='leaguetable sortable table detailed-table)
    block = block.find('tbody')
    rows = block.find_all('tr')
    teams = list()
    for row in rows:
        values = row.findAll('td')
        values = [value.get_text(strip=True) for value in values]
        teams.append({
            'logo': 0,
            'pos': values[0],
            'name': values[2],
            'played': values[3],
            'won': values[4],
            'draw': values[5],
            'lost': values[6],
            'for': values[7],
            'against': values[8],
            'points': values[10]
        })
    return teams
def get_last_lotto_num(params=None):
    url = "https://dhlottery.co.kr/gameResult.do?method=byWin"
    ua = fake_useragent.UserAgent()

    hdr = {'User-Agent': str(ua.random)}

    req = requests.get(url, params=params, headers=hdr)
    print('[get_html] url:', req.url)
    print('[get_html] status_code:', req.status_code)

    if req.status_code != 200:
        print("Error!")
        sys.exit(1)

    soup = BeautifulSoup(req.text, 'html.parser')

    last_round = soup.find('div', {'class': 'win_result'}).h4.strong.text

    last_round_date_check1 = soup.find('p', {'class': 'desc'}).text.split("(")
    last_round_date_check2 = last_round_date_check1[1].split(")")
    last_round_date = last_round_date_check2[0]

    last_round_num_check = soup.find('div', {
        'class': 'num win'
    }).p.text.split('\n')
    last_round_num = []
    for i in range(0, 7):
        if i == 0 or i == 7:
            continue
        last_round_num.append(int(last_round_num_check[i]))

    last_round_bonus_num = soup.find('div', {'class': 'num bonus'}).p.text

    return last_round, last_round_date, last_round_num, last_round_bonus_num
Exemple #29
0
def time_proxy(ip_addr,
               port,
               proxy_user=None,
               proxy_pass=None,
               url='https://www.redfin.com',
               timeout=10,
               TOTAL_TRIES_PER_URL=2):
    """Check all proxies in proxy.csv for ability to connect
    to redfin.com. Each proxy requests connection to the site
    twice, and only those with 100% success rate (2/2) kept.
    """
    success_counts = 0
    ua = fake_useragent.UserAgent()

    pull_proxies = construct_proxy(ip_addr, port)

    for i in range(TOTAL_TRIES_PER_URL):
        try:
            r = requests.get(url,
                             proxies=pull_proxies,
                             headers={'User-agent': ua.chrome},
                             timeout=timeout)
            if r.status_code == 200:
                success_counts += 1
        except Exception:
            pass

    print('for proxy {}'.format(pull_proxies))
    print('success rate = {}'.format(success_counts / TOTAL_TRIES_PER_URL))
    return success_counts / TOTAL_TRIES_PER_URL
Exemple #30
0
 def _random_headers(self):
     headers = {
         "dnt": "1",
         "user-agent": fake_useragent.UserAgent().random,
         "origin": "https://www.bing.com",
         "referer": "https://www.bing.com/translator",
     }
     return headers