Beispiel #1
0
def download_proxy(testUrl, proxyFilePath, counts, startPage=1, timeout=5):
    '''
    :param testUrl: Check whether the agent can request the URL normally 
    :param proxyFilePath: Finally save the path to the proxy file
    :param counts: Page counts
    :param startPage: The number of pages starting 
    :param timeout: timeout
    '''
    getLoop = []
    checkLoop = []
    for page in xrange(startPage, startPage + counts):
        target = "http://www.xicidaili.com/nn/%d" % page
        t = ProxyGet(target)
        getLoop.append(t)
        t.start()
    for t in getLoop:
        t.join()
    logger.info("raw proxy list - %s" % len(_RAW_PROXY_LIST))
    logger.debug('check url [-] %s' % testUrl)
    i = 0
    while i < len(_RAW_PROXY_LIST):
        t = ProxyCheck(_RAW_PROXY_LIST[i:i + 10], testUrl, timeout)
        i += 10
        checkLoop.append(t)
        t.start()
    for t in checkLoop:
        t.join()
    logger.info("checked proxy list - %s" % len(_CHECKED_PROXY_LIST))
    with open(proxyFilePath, 'w+') as f:
        for i in _CHECKED_PROXY_LIST:
            f.write(i + '\n')
    logger.info("write to file succeed [-] %s" % proxyFilePath)
Beispiel #2
0
 def getProxy(self):
     logger.debug('target [-] %s' % self.target)
     session = requests.session()
     response = session.get(url=self.target, headers=_HEADERS)
     response.encoding = 'utf-8'
     result = re.findall(_PATTERN, response.text, re.DOTALL)
     for row in result:
         ip = row[0]
         port = row[1]
         agent = row[2].lower()
         proxy = (agent, ip, port)
         _RAW_PROXY_LIST.append(proxy)
Beispiel #3
0
 def checkProxy(self):
     session = requests.session()
     for proxy in self.proxyList:
         proxies = {proxy[0]: "%s://%s:%s" % (proxy[0], proxy[1], proxy[2])}
         try:
             res = session.get(url=self.testUrl,
                               proxies=proxies,
                               headers=_HEADERS,
                               timeout=self.timeout)
             if res.status_code == 200:
                 logger.debug('checked [-] %s' % proxies[proxy[0]])
                 _CHECKED_PROXY_LIST.append("%s://%s:%s" %
                                            (proxy[0], proxy[1], proxy[2]))
             else:
                 continue
         except:
             continue
Beispiel #4
0
 def download(self, method, url, proxyEnable=False, **kwargs):
     '''
     :param method: 'GET','POST','PUT','DELETE','HEAD','OPTIONS'
     :param url: url
     :param proxyEnable: use proxy or not
     :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`
     :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`
     :param json: (optional) json data to send in the body of the :class:`Request`
     :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`
     :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`
     :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``)
                   for multipart encoding upload.``file-tuple`` can be a 2-tuple ``('filename', fileobj)``,
                   3-tuple ``('filename', fileobj, 'content_type')``or a 4-tuple ``('filename', fileobj,
                   'content_type', custom_headers)``, where ``'content-type'`` is a string defining the
                   content type of the given file and ``custom_headers`` a dict-like object containing
                   additional headers to add for the file
     :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth
     :param timeout: (optional) How long to wait for the server to send data
                     before giving up, as a float, or a :ref:`(connect timeout, read
                     timeout) <timeouts>` tuple <float or tuple>
     :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed <class bool>
     :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy
     :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``
     :param stream: (optional) if ``False``, the response content will be immediately downloaded
     :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair
     :return: Response if failed Response=None
     '''
     if (not proxyEnable) or (proxyEnable and not self.proxies):
         if proxyEnable and not self.proxies:
             logger.warning('No initialization proxy file or proxy file is not available')
         try:
             return requests.request(method, url, **kwargs)
         except Exception as e:
             logger.warning(e)
     else:
         try:
             oneProxy = self.proxies.pop(0)
             self.proxies.append(oneProxy)
             key = oneProxy.split(":")[0]
             oneProxy = {key: oneProxy}
             logger.debug('USE PROXY [-] %s' % oneProxy.values()[0])
             return requests.request(method, url, proxies=oneProxy, **kwargs)
         except ProxyError:
             return self.download(method, url, proxyEnable, **kwargs)
         except Exception as e:
             logger.warning(e)
Beispiel #5
0
# coding: utf-8

from dplog import logger

# ----参数含义参看 README.md----
logger.LOG_LEVEL = 10
# ----参数:输出到控制台(不建议修改)----
logger.IS_CONSOLE = (True, True)
logger.COLOR_ERROR = ('red', None, 'bold')
logger.COLOR_WARNING = ('yellow', None, 'bold')
logger.COLOR_INFO = ('cyan', None, 'bold')
logger.COLOR_DEBUG = ('green', None, 'bold')
# ----参数:日志写入部分----
logger.FILE_ERROR = None
logger.FILE_WARNING = None
logger.FILE_INFO = None
logger.FILE_DEBUG = None
logger.FILE_LOG = None
logger.FILE_MAX_BYTES = 128 * 1024 * 1024
logger.FILE_BACKUP_COUNT = 10
# ----参数:日志格式(不建议修改)----
logger.LOG_FORMAT = '[%(levelname)s] %(asctime)s %(message)s'
logger.TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
logger.FULL_FILE_PATH = False

if __name__ == '__main__':
    logger.error("123456789")
    logger.warning("123456789")
    logger.debug("123456789")
    logger.info("123456789")