Example #1
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     async with aiohttp.ClientSession(headers=headers) as session:
         url = self.url.format(self.count)
         async with session.get(url, timeout=timeout) as r:
             content = await r.text()
     return re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}',
                       content)
Example #2
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     proxies = []
     async with aiohttp.ClientSession(headers=headers) as session:
         for url in self.urls:
             async with session.get(url, timeout=timeout) as r:
                 content = await r.text()
             selector = html.fromstring(content)
             ul_list = selector.xpath('//ul[@class="l2"]')
             for ul in ul_list:
                 ips = ul.xpath('.//li/text()')[0:2]
                 proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips))
                 proxies.append(proxy)
     return proxies
Example #3
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     proxies = []
     async with aiohttp.ClientSession(headers=headers) as session:
         for page in range(1, 10):
             url = self.url.format(page=page)
             async with session.get(url, timeout=timeout) as r:
                 content = await r.text()
             selector = html.fromstring(content)
             proxy_list = selector.xpath('//td[@class="ip"]')
             for each_proxy in proxy_list:
                 ips = each_proxy.xpath('.//text()')
                 proxy = ''.join(map(lambda x: x.strip(' \t\n'), ips))
                 proxies.append(proxy)
     return proxies
Example #4
0
 async def get_proxy(self, timeout=60):
     headers = {
         'User-Agent': UserAgent.random()
     }
     async with aiohttp.ClientSession(headers=headers) as session:
         async with session.get(self.url, timeout=timeout) as r:
             content = await r.text()
     proxies = []
     selector = html.fromstring(content)
     tr_list = selector.xpath('//tr')[1:]
     for tr in tr_list:
         ips = tr.xpath('./td/text()')[0:2]
         proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips))
         proxies.append(proxy)
     return proxies
Example #5
0
 async def get_proxy(self, timeout=60):
     proxies = []
     with aiohttp.ClientSession() as session:
         for url in self.urls:
             for i in range(1, self.total + 1):
                 headers = {'User-Agent': UserAgent.random()}
                 target = url.format(i)
                 async with session.get(target,
                                        headers=headers,
                                        timeout=timeout) as r:
                     content = await r.text()
                 selector = html.fromstring(content)
                 tr_list = selector.xpath('//tbody/tr')
                 for tr in tr_list:
                     ip = tr.xpath('.//td[@data-title="IP"]/text()')
                     port = tr.xpath('.//td[@data-title="PORT"]/text()')
                     proxies.append(':'.join([ip[0], port[0]]))
                 await asyncio.sleep(3)
     return proxies
Example #6
0
from utils.color import Colored
import httpx
from httpx import Response
from typing import Dict, List, Set, Coroutine, Any, Union
import time
from core.queue import Task
from utils import generate_token, create_folder, dumps_content, loads_content, is_file_exists
from utils import decode_content
from utils.proxy_utls import get_random_proxy
from lxml import etree
from lxml.etree import HTMLParser
from bs4 import BeautifulSoup as bs
from config.constant import ENVIRONMENT, EnvironmentType, GET, POST, TEXT, JSON, CSS
from utils import UserAgent

ua = UserAgent()


@logme.log(name="Downloader")
# TODO: to use https://www.python-httpx.org/ to download web page
class Downloader(object):
    __slots__ = ['sleep_factor', 'session_container']

    def __init__(self):
        super().__init__()
        self.sleep_factor = 2
        self.session_container = dict()

    def close(self):
        self.session_container.clear()
Example #7
0
def random_headers():
    headers = {'User-Agent': UserAgent.random()}
    return headers