Exemple #1
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     async with aiohttp.ClientSession(headers=headers) as session:
         url = self.url.format(self.count)
         async with session.get(url, timeout=timeout) as r:
             content = await r.text()
     return re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}',
                       content)
Exemple #2
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     proxies = []
     async with aiohttp.ClientSession(headers=headers) as session:
         for url in self.urls:
             async with session.get(url, timeout=timeout) as r:
                 content = await r.text()
             selector = html.fromstring(content)
             ul_list = selector.xpath('//ul[@class="l2"]')
             for ul in ul_list:
                 ips = ul.xpath('.//li/text()')[0:2]
                 proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips))
                 proxies.append(proxy)
     return proxies
Exemple #3
0
 async def get_proxy(self, timeout=60):
     headers = {'User-Agent': UserAgent.random()}
     proxies = []
     async with aiohttp.ClientSession(headers=headers) as session:
         for page in range(1, 10):
             url = self.url.format(page=page)
             async with session.get(url, timeout=timeout) as r:
                 content = await r.text()
             selector = html.fromstring(content)
             proxy_list = selector.xpath('//td[@class="ip"]')
             for each_proxy in proxy_list:
                 ips = each_proxy.xpath('.//text()')
                 proxy = ''.join(map(lambda x: x.strip(' \t\n'), ips))
                 proxies.append(proxy)
     return proxies
Exemple #4
0
 async def get_proxy(self, timeout=60):
     headers = {
         'User-Agent': UserAgent.random()
     }
     async with aiohttp.ClientSession(headers=headers) as session:
         async with session.get(self.url, timeout=timeout) as r:
             content = await r.text()
     proxies = []
     selector = html.fromstring(content)
     tr_list = selector.xpath('//tr')[1:]
     for tr in tr_list:
         ips = tr.xpath('./td/text()')[0:2]
         proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips))
         proxies.append(proxy)
     return proxies
Exemple #5
0
 async def get_proxy(self, timeout=60):
     proxies = []
     with aiohttp.ClientSession() as session:
         for url in self.urls:
             for i in range(1, self.total + 1):
                 headers = {'User-Agent': UserAgent.random()}
                 target = url.format(i)
                 async with session.get(target,
                                        headers=headers,
                                        timeout=timeout) as r:
                     content = await r.text()
                 selector = html.fromstring(content)
                 tr_list = selector.xpath('//tbody/tr')
                 for tr in tr_list:
                     ip = tr.xpath('.//td[@data-title="IP"]/text()')
                     port = tr.xpath('.//td[@data-title="PORT"]/text()')
                     proxies.append(':'.join([ip[0], port[0]]))
                 await asyncio.sleep(3)
     return proxies
from utils.color import Colored
import httpx
from httpx import Response
from typing import Dict, List, Set, Coroutine, Any, Union
import time
from core.queue import Task
from utils import generate_token, create_folder, dumps_content, loads_content, is_file_exists
from utils import decode_content
from utils.proxy_utls import get_random_proxy
from lxml import etree
from lxml.etree import HTMLParser
from bs4 import BeautifulSoup as bs
from config.constant import ENVIRONMENT, EnvironmentType, GET, POST, TEXT, JSON, CSS
from utils import UserAgent

ua = UserAgent()


@logme.log(name="Downloader")
# TODO: to use https://www.python-httpx.org/ to download web page
class Downloader(object):
    __slots__ = ['sleep_factor', 'session_container']

    def __init__(self):
        super().__init__()
        self.sleep_factor = 2
        self.session_container = dict()

    def close(self):
        self.session_container.clear()
Exemple #7
0
def random_headers():
    headers = {'User-Agent': UserAgent.random()}
    return headers